Пример #1
0
def tosrt(sub):
	for file in sub:
		with open(file, 'r') as f:
			srtfile = asstosrt.convert(f)
		new_name = '.'.join(file.split('.')[:-1]) + '.srt'
		with open(new_name, 'w') as f:
			f.write(srtfile)
		os.remove(file)
Пример #2
0
def duplicate_srt_sub(ass_filename):
    srt_filename = ass_filename.replace(".ass", ".srt")
    with open(srt_filename, "w+", encoding="utf8") as srt_file:
        try:
            with open(ass_filename, encoding="utf8") as ass_file:
                srt_file.write(asstosrt.convert(ass_file))
            print("duplicate srt {}".format(srt_filename))
        except Exception as e:
            print(e)
Пример #3
0
def _convert_files(files, args):
    if args.encoding:
        in_codec = codecs.lookup(args.encoding)
    else:
        in_codec = None
    if args.srt_encoding:
        out_codec = codecs.lookup(args.srt_encoding)
    else:
        out_codec = in_codec

    sum = len(files)
    done = 0
    fail = 0
    ignore = 0
    print("Found {} file(s), converting...".format(sum))
    for in_path, out_path in _combine_output_file_path(files, args.output_dir):
        print("\t({:02d}/{:02d}) is converting... " \
                .format(done + fail + ignore + 1, sum), end='')
        if not args.force and os.path.exists(out_path):
            print('[ignore] (SRT exists)')
            ignore += 1
            continue
        try:
            with open(in_path, 'rb') as in_file:
                if args.encoding is None:  # Detect file charset.
                    in_codec = codecs.lookup(_detect_charset(in_file))
                    if args.srt_encoding is None:
                        out_codec = in_codec

                out_str = asstosrt.convert(in_codec.streamreader(in_file),
                                           args.translator, args.no_effact,
                                           args.only_first_line)

            with open(out_path, 'wb') as out_file:
                out_file.write(get_bom(out_codec))
                out_file.write(out_codec.encode(out_str)[0])
            done += 1
            print('[done]')

        except (UnicodeDecodeError, UnicodeEncodeError, LookupError) as e:
            print('[fail] (codec error)')
            print(e, file=sys.stderr)
            fail += 1
        except ValueError as e:
            print('[fail] (irregular format)')
            print(e, file=sys.stderr)
            fail += 1
        except IOError as e:
            print('[fail] (IO error)')
            print(e, file=sys.stderr)
            fail += 1

    print("All done:\n\t{} success, {} ignore, {} fail." \
            .format(done, ignore, sum - done - ignore))
Пример #4
0
def _convert_files(files, args):
    if args.encoding:
        in_codec = codecs.lookup(args.encoding)
    else:
        in_codec = None
    if args.srt_encoding:
        out_codec = codecs.lookup(args.srt_encoding)
    else:
        out_codec = in_codec

    sum = len(files)
    done = 0
    fail = 0
    ignore = 0
    print("Found {} file(s), converting...".format(sum))
    for in_path, out_path in _combine_output_file_path(files, args.output_dir):
        print("\t({:02d}/{:02d}) is converting... " \
                .format(done + fail + ignore + 1, sum), end='')
        if not args.force and os.path.exists(out_path):
            print('[ignore] (SRT exists)')
            ignore += 1
            continue
        try:
            with open(in_path, 'rb') as in_file:
                if args.encoding is None:  # Detect file charset.
                    in_codec = codecs.lookup(_detect_charset(in_file))
                    if args.srt_encoding is None:
                        out_codec = in_codec

                out_str = asstosrt.convert(in_codec.streamreader(in_file),
                        args.translator, args.no_effact, args.only_first_line)

            with open(out_path, 'wb') as out_file:
                out_file.write(get_bom(out_codec))
                out_file.write(out_codec.encode(out_str)[0])
            done += 1
            print('[done]')

        except (UnicodeDecodeError, UnicodeEncodeError, LookupError) as e:
            print('[fail] (codec error)')
            print(e, file=sys.stderr)
            fail += 1
        except ValueError as e:
            print('[fail] (irregular format)')
            print(e, file=sys.stderr)
            fail += 1
        except IOError as e:
            print('[fail] (IO error)')
            print(e, file=sys.stderr)
            fail += 1

    print("All done:\n\t{} success, {} ignore, {} fail." \
            .format(done, ignore, sum - done - ignore))
Пример #5
0
def convert_ass_to_srt(file_string):
    file_name, file_extension = os.path.splitext(file_string)
    if file_extension.endswith('ass') and os.path.isfile(file_string):
        # https://www.zhihu.com/question/36368902
        with open(file_string, 'r', encoding='utf-8',
                  errors='ignore') as ass_file:
            srt_str = asstosrt.convert(ass_file, no_effect=True)
        srt_file_name = file_name + '_converted_ass.srt'
        if os.path.isfile(srt_file_name):
            os.remove(srt_file_name)
        with open(srt_file_name, "w", encoding='utf-8',
                  newline='\n') as srt_file:
            srt_file.write(srt_str)
Пример #6
0
def main(path):
    subs = []

    # 如果是目录
    if os.path.isdir(path):
        for root, _, files in os.walk(path):
            for ass_file in files:
                if re.match("(?i).*(ass)$", ass_file):
                    subs.append(os.path.join(root, ass_file))
    # 如果是文件
    else:
        subs = {path}
        path = os.path.split(path)[0]

    for sub_file in subs:
        with open(sub_file, "rb") as ass_file:
            charset = chardet.detect(ass_file.read())["encoding"].lower()

        ass_file_name = '.'.join(sub_file.split('.')[:-1]) + '.srt'
        with open(sub_file, "r", encoding=charset) as ass_file:
            with open(ass_file_name, "wb") as srt_file:
                srt_file.write(convert(asstosrt.convert(ass_file), 'zh-cn').encode('utf8'))
                print(ass_file_name, "done")
Пример #7
0
    def run(self, fold):
        """
            主要逻辑控制
        :param fold: 输入文件夹
        :return:
        """
        if os.path.isdir(fold):
            files = list_file(folder=fold)
        elif os.path.isfile(fold):
            files = [fold]
        else:
            print("不是文件夹不是文件, 你到底是啥")
            files = []
        for file in files:
            coding_msg = self.check_char(file)
            encoding_format = coding_msg.get("encoding")
            if encoding_format == "Windows-1254":
                encoding_format = "utf8"

            with open(file, 'r', encoding=encoding_format) as f:
                # if file.endswith("srt") or file.endswith('txt'):
                #     subtitle_content = self.parse_srt(f)
                # elif file.endswith("ass"):
                #     srt_str = asstosrt.convert(f)
                #     srt = srt_str.split('\n')

                # 如果是ass文件将ass文件转化为 srt格式,这次的txt内容上是srt文件所以不做区分
                if file.endswith("ass"):
                    srt_str = asstosrt.convert(f)
                    f = srt_str.split('\n')
                subtitle_content = self.parse_srt(f)

            with open(r'C:\Users\Administrator\Desktop\vietnam_speaking.txt',
                      'a',
                      encoding='utf8') as f:
                f.write(subtitle_content + "\n")
Пример #8
0
# -*- coding: utf-8 -*-
import glob
import chardet
import asstosrt

for ass_fn in glob.iglob('*.ass'):
    with open(ass_fn, 'rb') as f:
        ass_bytes = f.read()

    encoding = chardet.detect(ass_bytes)['encoding']
    with open(ass_fn, 'r', encoding=encoding) as f:
        srt_str = asstosrt.convert(f)

    srt_fn = ass_fn[:-4] + '.srt'
    with open(srt_fn, 'w', encoding='utf-8') as f:
        f.write(srt_str)
Пример #9
0
    def baks_mkv(self):
        failedfiles = []
        print("------------")
        print('get srt.bak file from vedio!')

        for postfix in self.vediotypes:
            if postfix in self.files:
                for fi, f in enumerate(self.files[postfix]):
                    mkvfile = os.path.join(f[0], f[1])
                    print("------------")
                    print(fi, mkvfile)
                    srtbak_path = os.path.splitext(mkvfile)[0] + ".srt.bak"
                    if os.path.exists(srtbak_path):
                        print(srtbak_path, "exists")
                        print("------------")
                        continue
                    print("1. Check streams in mkv!")
                    res = subprocess.run(["ffmpeg",
                                          "-i", mkvfile],
                                         stderr=subprocess.PIPE)
                    """
                    print(res)
                    print("------------")
                    print(res.stderr)
                    print(res.stdout)
                    print(res.args)
                    print("------------over")
                    """
                    beg = False
                    streams = []
                    stream = None
                    for i, v in enumerate(res.stderr.decode("utf-8").split("\n")):
                        if "Stream" in v:
                            beg = True
                            m = re.match(r" *Stream #(\d+):(\d+)(\(\w+\))?: (\w+): (\w+)", v)
                            if m:
                                # print(m.groups()[1:])  # c
                                if stream is not None:
                                    streams.append(stream)
                                stream = {"id": m.group(2), "type": m.group(4), "filetype": m.group(5)}
                                if m.group(3) is not None:
                                    # subhead 副标题
                                    stream['subhead'] = m.group(3)
                                continue
                            else:
                                print("err: reg not work on this sentence")
                                print(v)
                                sys.exit(2)
                        elif not beg:
                            continue
                        else:
                            m = re.match(r" *title *: (.*)", v)
                            if m:
                                stream['title'] = m.group(1)
                        pass
                    pass

                    streams_df = pd.DataFrame(streams)
                    print("streams_df:\n", streams_df)  # c
                    # print(streams_df.dtypes) # c
                    if len(streams_df) < 2 or len(streams_df[streams_df["type"] == "Subtitle"]) <= 0:
                        print("not have enough stream")
                        failedfiles.append(mkvfile)
                        continue

                    print("2. get srt.bak file from mkv!")

                    engchs = []
                    if 'title' in streams_df:
                        criterion = streams_df['title']. \
                            map(lambda t: re.match(r"(英.中)|(英中)|(中英字幕)", str(t)) is not None)
                        engchs = streams_df[criterion]
                    if len(engchs) > 0:
                        # todo: deal engchs
                        print("engchs:\n", engchs)

                        # print( os.path.splitext(mkvfile)[0] + ".srt.bak") # c
                        base = os.path.splitext(mkvfile)[0]
                        srt = base + ".srt"

                        if 'ass' == str(engchs.iloc[0].filetype):
                            ass = base + ".ass"

                            ass_file = open(ass)
                            srt_str = asstosrt.convert(ass_file)
                            f = open(srt, "w")
                            f.write(srt_str)
                            f.close()
                            """
                            subprocess.run(
                                ["mkvextract", "tracks",
                                 mkvfile,
                                 "{0}:{1}".format(
                                     str(engchs.iloc[0].id),
                                     ass
                                 )
                                 ])
                            time.sleep(2)
                            res = subprocess.run(["ffmpeg", "-i", ass, srt])
                            if os.path.getsize(srt) <= 10:
                                print(res)
                                print("------------")
                                print(res.stderr)
                                print(res.stdout)
                                print(res.args)
                                print("------------over")
                                sys.exit(8)
                            """
                            if os.path.exists(srt):
                                shutil.move(srt, srtbak_path)
                            else:
                                # todo: log err
                                print("error: {} failed to create!".format(srt))
                                failedfiles.append(mkvfile)
                            pass
                        elif 'subrip' == str(engchs.iloc[0].filetype):
                            print("filetype:", str(engchs.iloc[0].filetype))
                            subprocess.run(
                                ["mkvextract", "tracks",
                                 mkvfile,
                                 "{0}:{1}".format(
                                     str(engchs.iloc[0].id),
                                     srtbak_path
                                 )
                                 ])
                        pass
                    else:
                        criterion = streams_df['subhead']. \
                            map(lambda t: re.match(r"eng", str(t)) is not None)
                        eng = streams_df[
                            criterion &
                            (streams_df.type == "Subtitle")
                            ]
                        if len(eng) > 0:
                            # todo: deal eng
                            print("eng:\n", eng)
                            pass
                        else:
                            print('-------------')
                            print(f[1], "has no suitable subtitle stream!")
                            print(streams_df[streams_df.type == "Subtitle"])
                            failedfiles.append(mkvfile)
                            print('-------------')
                            pass

        print("files failed to create .srt.bak:\n")
        for f in failedfiles:
            print(f)
        print('-------------')
        pass