Exemplo n.º 1
0
    def write_file(self, fname):
        # TODO: converter to go between self.subs_format and out_format
        if fname is None:
            out_format = self._sub_format
        else:
            out_format = os.path.splitext(fname)[-1][1:]
        subs = list(self.gen_raw_resolved_subs())
        if self._sub_format in ('ssa', 'ass'):
            ssaf = pysubs2.SSAFile()
            ssaf.events = subs
            ssaf.styles = self.styles
            if self.info is not None:
                ssaf.info = self.info
            to_write = ssaf.to_string(out_format)
        elif self._sub_format == 'srt' and out_format in ('ssa', 'ass'):
            to_write = pysubs2.SSAFile.from_string(
                srt.compose(subs)).to_string(out_format)
        elif out_format == 'srt':
            to_write = srt.compose(subs)
        else:
            raise NotImplementedError('unsupported output format: %s' %
                                      out_format)

        to_write = to_write.encode(self.encoding)
        if six.PY3:
            with open(fname or sys.stdout.fileno(), 'wb') as f:
                f.write(to_write)
        else:
            with (fname and open(fname, 'wb')) or sys.stdout as f:
                f.write(to_write)
Exemplo n.º 2
0
def write_srt_to_file(fname, subs):
    if sys.version_info[0] > 2:
        with open(fname or sys.stdout.fileno(), 'w', encoding='utf-8') as f:
            return f.write(srt.compose(subs))
    else:
        with (fname and open(fname, 'w')) or sys.stdout as f:
            return f.write(srt.compose(subs).encode('utf-8'))
Exemplo n.º 3
0
    def write_file(self, fname: str) -> None:
        # TODO: converter to go between self.subs_format and out_format
        if fname is None:
            out_format = self._sub_format
        else:
            out_format = os.path.splitext(fname)[-1][1:]
        subs = list(self.gen_raw_resolved_subs())
        if self._sub_format in ("ssa", "ass"):
            ssaf = pysubs2.SSAFile()
            ssaf.events = subs
            if self._styles is not None:
                ssaf.styles = self._styles
            if self._info is not None:
                ssaf.info = self._info
            if self._fonts_opaque is not None:
                ssaf.fonts_opaque = self._fonts_opaque
            to_write = ssaf.to_string(out_format)
        elif self._sub_format == "srt" and out_format in ("ssa", "ass"):
            to_write = pysubs2.SSAFile.from_string(
                srt.compose(subs)).to_string(out_format)
        elif out_format == "srt":
            to_write = srt.compose(subs)
        else:
            raise NotImplementedError("unsupported output format: %s" %
                                      out_format)

        to_write = to_write.encode(self._encoding)
        if six.PY3:
            with open(fname or sys.stdout.fileno(), "wb") as f:
                f.write(to_write)
        else:
            with (fname and open(fname, "wb")) or sys.stdout as f:
                f.write(to_write)
Exemplo n.º 4
0
 def write_file(self, fname):
     if sys.version_info[0] > 2:
         with open(fname or sys.stdout.fileno(), 'w', encoding=self.encoding) as f:
             return f.write(srt.compose(self))
     else:
         with (fname and open(fname, 'w')) or sys.stdout as f:
             return f.write(srt.compose(self).encode(self.encoding))
Exemplo n.º 5
0
    def print_for_align(self, count=4):
        data = []
        for sub in self.subs:
            lines = srt.compose(sub.sub[:count])
            res = []
            for l in lines.splitlines():
                res += textwrap.wrap(l, COLUMN_WIDTH)
            data.append(res)

        out = itertools.zip_longest(*data,  fillvalue="")

        for s in out:
            print("{}  |  {}".format(s[0]+(COLUMN_WIDTH-len(s[0]))*" ", s[1]))

        print("----------------------------------------------------")
        data = []
        for sub in self.subs:
            lines = srt.compose(sub.sub[-count:], reindex=False)
            res = []
            for l in lines.splitlines():
                res += textwrap.wrap(l, COLUMN_WIDTH)
            data.append(res)

        out = itertools.zip_longest(*data,  fillvalue="")

        for s in out:
            print("{}  |  {}".format(s[0]+(COLUMN_WIDTH-len(s[0]))*" ", s[1]))
Exemplo n.º 6
0
Arquivo: utils.py Projeto: vdt/subsync
def write_srt_to_file(fname, subs):
    if fname is None:
        return sys.stdout.write(srt.compose(subs))
    if sys.version_info[0] > 2:
        with open(fname, 'w', encoding='utf-8') as f:
            return f.write(srt.compose(subs))
    else:
        with open(fname, 'w') as f:
            return f.write(srt.compose(subs).encode('utf-8'))
Exemplo n.º 7
0
def generate_srt(ctx, result, **kwargs):
    # Generate srt file for video.
    fps = kwargs['metadata']['output_fps']
    frame = kwargs['metadata']['frame_num']
    current_time = float(frame) / fps
    step = datetime.timedelta(milliseconds=1. / fps * 1000)
    duration = kwargs['metadata']['duration']

    global object_srt
    global caption_srt

    object_classes = collections.Counter(result['detection_classes'])
    classes_string = ', '.join(
        [f'{name}: {count}' for name, count in object_classes.items()])
    if not object_srt:
        start = datetime.timedelta(milliseconds=0)
    else:
        start = datetime.timedelta(seconds=current_time) - step

    end = start + step
    sub = srt.Subtitle(index=len(object_srt) + 1,
                       start=start,
                       end=end,
                       content=classes_string)
    if object_srt and object_srt[-1].content == classes_string:
        object_srt[-1].end = end
    elif not object_srt or object_srt[-1].content != classes_string:
        object_srt.append(sub)

    if ctx.build_caption:
        captions = result['captions']
        if len(captions) > 0:
            caption = captions[0]
            if not caption_srt:
                start = datetime.timedelta(milliseconds=0)
            else:
                start = datetime.timedelta(seconds=current_time) - step

            end = start + step
            sub = srt.Subtitle(index=len(caption_srt) + 1,
                               start=start,
                               end=end,
                               content=caption)
            if caption_srt and caption_srt[-1].content == caption:
                caption_srt[-1].end = end
            elif not caption_srt or caption_srt[-1].content != caption:
                caption_srt.append(sub)

    if current_time + 2 >= duration:
        with open(PARAMS['objects_srt_file'], 'w') as sw:
            sw.write(srt.compose(object_srt))
        with open(PARAMS['captions_srt_file'], 'w') as sw:
            sw.write(srt.compose(caption_srt))
def cleanLine(text_in):

    try:
        subs = list(srt.parse(text_in, ignore_errors=True))
        if len(subs) > 0:
            # Trim white spaces
            text_stripped = []
            for i in range(len(subs)):
                orig_text = subs[i].content
                stripped_text = subs[i].content.strip()
                if orig_text != stripped_text:
                    text_stripped.append(subs[i].index)
                    subs[i].content = subs[i].content.strip()

            # Find the list index of the empty lines. This is different than the srt index!
            # The list index starts from 0, but the srt index starts from 1.
            count = 0
            to_delete = []
            for sub in subs:
                if not sub.content:
                    to_delete.append(count)
                count = count + 1

            to_delete.sort(reverse=True)

            # Delete the empty/blank subtitles
            for i in to_delete:
                del subs[i]

            # Fix Index and trim white spaces
            for i in range(len(subs)):
                subs[i].index = i + 1

            if not text_stripped and not to_delete:
                logger.debug("CleanLine, Subtitle clean. No changes made.")
                return 0, srt.compose(subs)

            else:
                logger.debug("Index of subtitles deleted: {0}".format(
                    [i + 1 for i in to_delete]))
                logger.debug(
                    "Index of subtitles trimmed: {0}".format(text_stripped))
                logger.debug('{0} deleted, {1} trimmed'.format(
                    len(to_delete), len(text_stripped)))
                return len(subs), srt.compose(subs)
        else:
            logger.debug('No subtitles found.')
    except Exception as e:
        logger.debug(f"CleanSubtitle_CL, unexpected error: {e}")
Exemplo n.º 9
0
 def write_srt(self, srt_filename):
     srt_file = open(srt_filename, 'w')
     if srt_file == None:
         ctypes.windll.user32.MessageBoxW(0, "cannot write %s permission denied" % srt_filename, "Text 2 subtitles", 1 + 0x30)
         return
     print(srt.compose(self.subtitles), file=srt_file)
     srt_file.close()
Exemplo n.º 10
0
def make_subs(filename,sub_data):
    filename = filename.replace('.mov','.srt')
    print filename, sub_data
    subtitles = []

    for item in sorted(sub_data,key=int):
        print item, sub_data[item]

        seconds = int(item)

        index = 1
        start = timedelta(0,seconds,0)
        end = timedelta(0,seconds+10,0)
        content = sub_data[item]

        s = Subtitle(index, start, end, content, proprietary='')

        subtitles.append(s)

    # print srt.compose(subtitles)

    print len(subtitles)

    file = open(path + filename, "wb")
    file.write(srt.compose(subtitles).encode('utf-8'))
    file.close()
    print "output to:",path+filename
Exemplo n.º 11
0
def test_parsing_no_content(subs):
    for subtitle in subs:
        subtitle.content = ""

    reparsed_subtitles = srt.parse(
        srt.compose(subs, reindex=False, strict=False))
    subs_eq(reparsed_subtitles, subs)
Exemplo n.º 12
0
def cleanUp(text_in):

    # okrugle zagrade                     '(\([^\)]*\))'
    # kockaste zagrade                    '(\[[^]]*\])'
    # vitičaste zagrade                   '(\{[^}]*\})'
    # crtice na početku prazne linije     '^\s*?\-+\s*?(?<=$)'
    # Tačka na kraju, prazna linija       '(^\s*?\.+)$'
    # Zarez na kraju, prazna linija       '(^\s*?,+)$'
    # Tačka zarez na kraju, prazna linija '(^\s*?;+)$'
    # Spejs na kraju linije         '(\s*?)$'
    # Uzvičnici                     '(^\s*?!+\s*?)$'
    # Znak pitanja                  '(^\s*?\?+\s*?)$'
    # Prva prazna linija            '(?<=,\d\d\d)\n\n(?=\w)'
    # '(?<=,\d\d\d)\n\n(?=\s*\S*?)'
    # reg-4 = re.compile(r'((?!\n)([A-Z\s]*){1,3}(?=\:)(?<![0-9a-z])\:\s)')
    reg_4 = re.compile(
        r"^\s*\-\.+\s+|(([A-Z ]*){1,3}(?=\:)(?<![0-9a-z])\:\s)|^[ \t]*", re.M)
    reg_P6 = re.compile(
        r"(\([^\)]*\))|(\[[^]]*\])|(\{[^}]*\})|(<i>\s*<\/i>)|^\s*?\-+\s*?(?<=$)",
        re.M)
    reg4n = re.compile(r'([A-Z ]*) [0-3](?=\:)')  # MAN 1: broj 1-3
    reg_P8 = re.compile(
        r"(\s*?)$|(^\s*?\.+)$|(^\s*?,+)$|(^\s*?;+)$|(^\s*?!+\s*?)$|(^\s*?\?+\s*?)$",
        re.M,
    )
    reg_S9 = re.compile("(?<=,\d\d\d)\n\n(?=\w)|(?<=,\d\d\d)\n\n(?=\s*\S*?)",
                        re.M)
    reg8a = re.compile(
        r'^\s*(?<=.)|^-(?<=$)',
        re.M)  # Spejs na pocetku linije, i crtica na početku prazne linije
    regN = re.compile(r'(?<=^-)\:\s*', re.M)  # dve tacke iza crtice
    regColon = re.compile(r"^\s*: *", re.M)
    RL = re.compile(
        r"\d+\n\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}(?=\n\s*\d+\n*?)"
    )

    # def opFile(in_text):
    # return in_text.replace(']:', ']').replace('):', ')').replace('}:', '}').replace('  ', ' ')

    textis = srt.parse(text_in, ignore_errors=True)
    text_subs = srt.compose(textis)

    try:
        fp3 = reg_4.sub("", text_subs)

        fp5 = reg_P6.sub("", fp3)

        # rf1 = opFile(fp5)
        rf1 = regColon.sub("", fp5)

        fp11 = reg_P8.sub("", rf1)
        fp13 = reg_S9.sub("\n", fp11)
        fp13 = RL.sub("\n", fp13)
        fp14 = regN.sub('', fp13)
        fp15 = reg8a.sub('', fp14)

        return fp15

    except Exception as e:
        logger.debug(f"CleanSubtitle proc, unexpected error: {e}")
Exemplo n.º 13
0
def saveSubtitle(videoFile, duration, thresh, language):
    files = [('Subtitle Files', '*.srt'), ('Text Document', '*.txt')]
    fileName = asksaveasfile(filetypes=files, defaultextension=files)
    if fileName:
        f = open(fileName.name, "w")
        f.write(srt.compose(Subtitle(videoFile, duration, thresh, language)))
        f.close()
Exemplo n.º 14
0
def buildVTT(bookInfoFileName, outputDir):
    # 从json中读取分段信息。
    info = {}
    with open(bookInfoFileName, 'r', encoding='UTF-8') as f:
        info = json.load(f)
        print(info['chapter'])
        for chapter in info['chapter']:
            subs = []
            index = 0
            offset = info['split'][chapter['start']]['start']
            for i in range(chapter['start'], chapter['end'] + 1):
                split = info['split'][i]
                start = timedelta(milliseconds=(split['start'] - offset))
                end = timedelta(milliseconds=(split['end'] - offset))
                content = split['texc']
                subs.append(srt.Subtitle(index, start, end, content))
                index += 1

            # 保存vtt字幕文件
            vttfilename = '%s/%s.vtt' % (
                outputDir,
                validateTitle('%02d %s' %
                              (chapter['index'] + 1, chapter['title'])))
            with open(vttfilename, 'w', encoding='UTF-8') as f:
                # srt转成WebVTT格式
                strVTT = srt2vtt(srt.compose(subs))
                print(strVTT)
                f.write(strVTT)
                print('%s done.' % vttfilename)
    return
Exemplo n.º 15
0
 def shiftSubsNegative(self):
     for filename in self.filenames[0]:
         with open(filename, "r", encoding="utf8") as file:
             lines = file.readlines()
             file.close()
         sub = ''.join(map(str, lines))
         subGen = srt.parse(sub)
         subtitles = list(subGen)
         retimed = []
         for sub in subtitles:
             sub.start = sub.start - datetime.timedelta(
                 milliseconds=self.timeShift)
             sub.end = sub.end - datetime.timedelta(
                 milliseconds=self.timeShift)
             retimed.append(sub)
         final = srt.compose(retimed)
         with open(filename, "w", encoding="utf8") as file:
             for f in final:
                 file.write(f)
             file.close()
     dlg = QDialog(self)
     dlg.setWindowTitle("Retiming done!")
     layout = QVBoxLayout()
     dlg.setLayout(layout)
     dlg.layout().addWidget(QLabel("Retiming successful!"))
     dlg.exec_()
Exemplo n.º 16
0
 def cleanSubs(self):
     for filename in self.filenames[0]:
         with open(filename, "r", encoding="utf8") as file:
             lines = file.readlines()
             file.close()
         validLines = []
         for line in lines:
             line = re.sub("(.*)", "", line)
             line = re.sub("\\(.*\\)", "", line)
             validLines.append(line)
         sub = ''.join(map(str, validLines))
         subGen = srt.parse(sub)
         subtitles = list(subGen)
         clean = []
         for sub in subtitles:
             if sub.content is not None:
                 clean.append(sub)
         final = srt.compose(clean)
         with open(filename, "w", encoding="utf8") as file:
             for f in final:
                 file.write(f)
             file.close()
     dlg = QDialog(self)
     dlg.setWindowTitle("Done!")
     layout = QVBoxLayout()
     dlg.setLayout(layout)
     dlg.layout().addWidget(QLabel("Success!"))
     dlg.exec_()
Exemplo n.º 17
0
def merge_srt(zh_file_path, en_file_path, new_file_path):
    zh_file = open(zh_file_path, mode='r', encoding='utf-8')
    zh_srt_s = zh_file.read()
    zh_file.close()
    en_file = open(en_file_path, mode='r', encoding='utf-8')
    en_srt_s = en_file.read()
    en_file.close()

    zh_subs = list(srt.parse(zh_srt_s))
    zh_length = len(zh_subs)
    en_subs = list(srt.parse(en_srt_s))
    en_length = len(en_subs)
    if zh_length != en_length:
        return False

    subs = []
    for i in range(zh_length):
        temp_subtitle = zh_subs[i]
        temp_subtitle.content = temp_subtitle.content + '\n' + en_subs[
            i].content
        temp_subtitle.content = srt.make_legal_content(temp_subtitle.content)
        subs.append(temp_subtitle)

    srt_s = srt.compose(subs)

    # 将srt_s输出到new_file
    new_file = open(new_file_path, 'w+', encoding='utf-8')
    new_file.write(srt_s)
    new_file.close()

    return True
Exemplo n.º 18
0
 def run(self) -> None:
     """
     번역된 srt 파일 생성
     :return: None
     """
     with self.__src_file.open('rt') as fp:
         file_contents = fp.read()
     filename = self.__src_file.name
     google_trans = google_translator()
     subtitles = list()
     for sub in srt.parse(file_contents):
         translated_content = google_trans.translate(
             sub.content,
             lang_src=self.__lang_src,
             lang_tgt=self.__lang_tgt)
         tmp_sub = srt.Subtitle(index=sub.index,
                                start=sub.start,
                                end=sub.end,
                                content=translated_content,
                                proprietary=sub.proprietary)
         sys.stdout.write('[{0}][{1}]: {2}\n'.format(
             filename, tmp_sub.index, tmp_sub.content))
         subtitles.append(tmp_sub)
     # 번역된 srt 파일 쓰기
     dst_file = self.__dst_dirpath / self.__src_file.name
     with dst_file.open('wt') as fp:
         fp.write(srt.compose(subtitles))
Exemplo n.º 19
0
async def run_test(uri):
    async with websockets.connect(uri) as websocket:
        wf = wave.open(sys.argv[1], "rb")
        await websocket.send('{ "config" : { "sample_rate" : %d } }' % (wf.getframerate()))

        results = []
        buffer_size = int(wf.getframerate() * 0.2) # 0.2 seconds of audio
        while True:
            data = wf.readframes(buffer_size)

            if len(data) == 0:
                break

            await websocket.send(data)
            results.append(await websocket.recv())

        await websocket.send('{"eof" : 1}')
        results.append(await websocket.recv())

        subs = []
        for i, res in enumerate(results):
           jres = json.loads(res)
           if not 'result' in jres:
               continue
           words = jres['result']
           for j in range(0, len(words), WORDS_PER_LINE):
               line = words[j : j + WORDS_PER_LINE] 
               s = srt.Subtitle(index=len(subs), 
                   content=" ".join([l['word'] for l in line]),
                   start=datetime.timedelta(seconds=line[0]['start']), 
                   end=datetime.timedelta(seconds=line[-1]['end']))
               subs.append(s)

        print(srt.compose(subs))
Exemplo n.º 20
0
def write_srt(args, subs):
    srt_file = args.out_file + ".srt"
    print("Writing {} subtitles to: {}".format(args.language_code, srt_file))
    f = open(srt_file, 'w')
    f.writelines(srt.compose(subs))
    f.close()
    return
Exemplo n.º 21
0
def zameniImena(text_in):

    if len(list(srt.parse(text_in))) == 0:
        logger.debug(f"Transkrib, No subtitles found.")
    else:
        text_in = srt.compose(srt.parse(text_in, ignore_errors=True))

    robj1 = re.compile(r'\b(' + '|'.join(map(re.escape, dictionary_1.keys())) +
                       r')\b')
    robj2 = re.compile(r'\b(' + '|'.join(map(re.escape, dictionary_2.keys())) +
                       r')\b')
    robj3 = re.compile(r'\b(' + '|'.join(map(re.escape, dictionary_0.keys())) +
                       r')\b')

    robjN1 = re.compile(r'\b(' + '|'.join(map(re.escape, dict1_n.keys())) +
                        r')\b')
    robjN2 = re.compile(r'\b(' + '|'.join(map(re.escape, dict2_n.keys())) +
                        r')\b')
    robjN0 = re.compile(r'\b(' + '|'.join(map(re.escape, dict0_n.keys())) +
                        r')\b')

    robjL0 = re.compile(r'\b(' + '|'.join(map(re.escape, dict0_n2.keys())) +
                        r')\b')
    robjL1 = re.compile(r'\b(' + '|'.join(map(re.escape, dict1_n2.keys())) +
                        r')\b')
    robjL2 = re.compile(r'\b(' + '|'.join(map(re.escape, dict2_n2.keys())) +
                        r')\b')
    try:
        t_out1 = robj1.subn(lambda x: dictionary_1[x.group(0)], text_in)
        t_out2 = robj2.subn(lambda x: dictionary_2[x.group(0)], t_out1[0])
        t_out3 = robj3.subn(lambda x: dictionary_0[x.group(0)], t_out2[0])

        t_out4 = robjN1.subn(lambda x: dict1_n[x.group(0)], t_out3[0])
        t_out5 = robjN2.subn(lambda x: dict2_n[x.group(0)], t_out4[0])
        t_out6 = robjN0.subn(lambda x: dict0_n[x.group(0)], t_out5[0])
    except Exception as e:
        logger.debug(F"Transkripcija, error: {e}")

    def doRepl(inobj, indict, text):
        try:
            out = inobj.subn(lambda x: indict[x.group(0)], text)
            return out[1]
        except IOError as e:
            logger.debug(f"Replace keys, I/O error: {e}")
        except Exception as e:
            logger.debug(f"Replace keys, unexpected error: {e}")

    if len(dict1_n2) != 0:
        doRepl(robjL1, dict1_n2, t_out6[0])
    if len(dict2_n2) != 0:
        doRepl(robjL2, dict2_n2, t_out6[0])
    if len(dict0_n2) != 0:
        doRepl(robjL0, dict0_n2, t_out6[0])

    much = t_out1[1] + t_out2[1] + t_out3[1] + t_out4[1] + t_out5[1] + t_out6[1]
    logger.debug(
        'Transkripcija u toku.\n--------------------------------------')
    logger.debug(f'Zamenjeno ukupno {much} imena i pojmova')

    return much, t_out6[0]
Exemplo n.º 22
0
def main(args = argv[1:]):
  from argparse import ArgumentParser
  app = ArgumentParser("lrc_merge",
    description="merge simple timeline LRC into line-splited LRC",
    epilog="if the result is truncated, try to split your input in lines")
  app.add_argument("-dist", type=float, default=0.8, help="max distance for words in same sentence")
  app.add_argument("-min-len", type=float, default=0.0, help="min duration for last word in sentence (LRC only)")
  app.add_argument("-o", type=str, default="a.srt", help="ouput SRT file")
  app.add_argument("-sep", type=str, default=None, help="word seprator (or decided automatically from sentence)")
  app.add_argument("file", type=str, help="input SRT file (or 'lrc' and input from stdin)")

  cfg = app.parse_args(args)
  use_lrc = cfg.file == "lrc"
  inSameLine = lambda a, b: abs((a.start if use_lrc else a.end) - b.start).total_seconds() < cfg.dist

  #v regex findall has input size limitations...
  data = list(flatMap(lambda t: fromLrc(t, cfg.min_len), readLines("lrc")) if use_lrc else fromSrt(open(cfg.file).read()))
  print(" ".join([f"{srt.start.total_seconds()};{srt.content}" for srt in data]))

  print("== lyrics")
  result = list(zipTakeWhile(inSameLine, data) )
  print(intoLrc(result, cfg.sep))

  with open(cfg.o, "w+") as srtf:
    srtf.write(compose(intoSrt(result, cfg.sep)))
Exemplo n.º 23
0
def write_srt(lang, lang_subs):
    filename = lang + ".srt"
    f = open(filename, "w")
    f.write(srt.compose(lang_subs, strict=True))
    f.close()
    print("Wrote SRT file {}".format(filename))
    return
Exemplo n.º 24
0
def test_parsing_content_with_blank_lines(subs):
    for subtitle in subs:
        # We stuff a blank line in the middle so as to trigger the "special"
        # content parsing for erroneous SRT files that have blank lines.
        subtitle.content = subtitle.content + "\n\n" + subtitle.content

    reparsed_subtitles = srt.parse(srt.compose(subs, reindex=False, strict=False))
    subs_eq(reparsed_subtitles, subs)
Exemplo n.º 25
0
def test_parsing_no_content(subs):
    for subtitle in subs:
        subtitle.content = ''

    reparsed_subtitles = srt.parse(srt.compose(
        subs, reindex=False, strict=False,
    ))
    subs_eq(reparsed_subtitles, subs)
Exemplo n.º 26
0
def test_parsing_content_with_blank_lines(subs):
    for subtitle in subs:
        # We stuff a blank line in the middle so as to trigger the "special"
        # content parsing for erroneous SRT files that have blank lines.
        subtitle.content = subtitle.content + "\n\n" + subtitle.content

    reparsed_subtitles = srt.parse(srt.compose(subs, reindex=False, strict=False))
    subs_eq(reparsed_subtitles, subs)
Exemplo n.º 27
0
def test_parser_noncontiguous_ignore_errors(subs, fake_idx, garbage,
                                            fake_timedelta):
    composed = srt.compose(subs)
    srt_timestamp = srt.timedelta_to_srt_timestamp(fake_timedelta)
    composed = composed.replace(
        "\n\n", "\n\n%d\n%s %s" % (fake_idx, srt_timestamp, garbage))
    # Should not raise, we have ignore_errors
    list(srt.parse(composed, ignore_errors=True))
Exemplo n.º 28
0
def compose_suggest_on_fail(subs, strict=True):
    try:
        return srt.compose(subs, strict=strict)
    except srt.SRTParseError as thrown_exc:
        log.fatal(
            'Parsing failed, maybe you need to pass a different encoding '
            'with --encoding?')
        raise
Exemplo n.º 29
0
 def write_to_file(self, file=None):
     if self.srt_list is None:
         print('Skipping {} because of bad srt format.'.format(self.file))
         return
     if file is None:
         file = self.file
     with open(file, "w", encoding="utf-8") as f:
         f.write(srt.compose(self.srt_list))
Exemplo n.º 30
0
def translate_and_compose(input_file, output_file, src_lang: str, target_lang: str, encoding='UTF-8', mode='split', both=True, space=False):
    """
    Translate the srt file
        Afrikaans	af      Albanian	sq      Amharic	am      Arabic	ar      Armenian	hy      Azerbaijani	az
        Basque	eu          Belarusian	be      Bengali	bn      Bosnian	bs      Bulgarian	bg      Catalan	ca
        Cebuano	ceb         Chinese(Simplified)	zh-CN           Chinese (Traditional)	zh-TW
        Corsican	co      Croatian	hr      Czech	cs      Danish	da      Dutch	nl          English	en
        Esperanto	eo      Estonian	et      Finnish	fi      French	fr      Frisian	fy          Galician	gl
        Georgian	ka      German	de          Greek	el      Gujarati	gu  Haitian Creole	ht  Hausa	ha
        Hawaiian	haw     Hebrew	he          Hindi	hi      Hmong	hmn     Hungarian	hu      Icelandic	is
        Igbo	ig          Indonesian	id      Irish	ga      Italian	it      Japanese	ja      Javanese	jw
        ...
        Explore more google translate supported language please visit: https://cloud.google.com/translate/docs/languages

    English, French, German ... are the language that split each word in a sentence by space
    Chinese, Japanese are NOT the language that split each word in a sentence by space

    mode: 'naive' or 'split'
    both: if it is True, save both src_lang and target_lang, otherwise save only target_lang
    :param input_file: input file path, only srt file supported currently
    :param output_file: output file path
    :param src_lang: source language. the ISO-639-1 language code of the input text
    :param target_lang: target language. the ISO-639-1 language code of the output text
    :param encoding: encoding of the input file
    :param mode: 'naive' or 'split'
    :param both: save both src_lang and target_lang or target_lang only
    :param space: is the vocabulary of target language split by space
    :return: None
    """
    srt_file = open(input_file, encoding=encoding)
    subtitle = list(srt.parse(srt_file.read()))

    # filter out empty subs
    subtitle = [sub for sub in subtitle if sub.content.strip()]

    if mode == 'naive':
        translated_list = simple_translate_srt(subtitle, src_lang, target_lang)
    else:
        translated_list = translate_srt(subtitle, src_lang, target_lang, space=space)

    if len(subtitle) == len(translated_list):
        if both:
            for i in range(len(subtitle)):
                subtitle[i].content = subtitle[i].content.replace('\n', ' ') + '\n' + translated_list[i]
        else:
            for i in range(len(subtitle)):
                subtitle[i].content = translated_list[i]
    else:
        from pprint import pprint
        print('Error')
        pprint("Subtitles")
        pprint(subtitle)
        pprint("translated_list")
        pprint(translated_list)
        print('Error')

    with open(output_file, 'w', encoding='UTF-8') as f:
        f.write(srt.compose(subtitle))
Exemplo n.º 31
0
def test_can_compose_without_ending_blank_line(input_subs):
    '''
    Many sub editors don't add a blank line to the end, and many editors accept
    it. We should just accept this too in input.
    '''
    composed = srt.compose(input_subs, reindex=False)
    composed_without_ending_blank = composed[:-1]
    reparsed_subs = srt.parse(composed_without_ending_blank)
    subs_eq(reparsed_subs, input_subs)
Exemplo n.º 32
0
def compose_suggest_on_fail(subs, strict=True):
    try:
        return srt.compose(subs, strict=strict)
    except srt.SRTParseError as thrown_exc:
        log.fatal(
            'Parsing failed, maybe you need to pass a different encoding '
            'with --encoding?'
        )
        raise
Exemplo n.º 33
0
def test_compose_and_parse_strict_crlf(input_subs):
    composed_raw = srt.compose(input_subs, reindex=False)
    composed = composed_raw.replace('\n', '\r\n')
    reparsed_subs = list(srt.parse(composed))

    for sub in reparsed_subs:
        sub.content = sub.content.replace('\r\n', '\n')

    subs_eq(reparsed_subs, input_subs)
Exemplo n.º 34
0
def test_compose_and_parse_strict_crlf(input_subs):
    composed_raw = srt.compose(input_subs, reindex=False)
    composed = composed_raw.replace("\n", "\r\n")
    reparsed_subs = list(srt.parse(composed))

    for sub in reparsed_subs:
        sub.content = sub.content.replace("\r\n", "\n")

    subs_eq(reparsed_subs, input_subs)
Exemplo n.º 35
0
def test_can_compose_without_ending_blank_line(input_subs):
    """
    Many sub editors don't add a blank line to the end, and many editors accept
    it. We should just accept this too in input.
    """
    composed = srt.compose(input_subs, reindex=False)
    composed_without_ending_blank = composed[:-1]
    reparsed_subs = srt.parse(composed_without_ending_blank)
    subs_eq(reparsed_subs, input_subs)
Exemplo n.º 36
0
def test_parser_noncontiguous(subs, fake_idx, garbage, fake_timedelta):
    composed = srt.compose(subs)

    # Put some garbage between subs that should trigger our failed parsing
    # detection. Since we do some magic to try and detect blank lines that
    # don't really delimit subtitles, it has to look at least a little like an
    # SRT block.
    srt_timestamp = srt.timedelta_to_srt_timestamp(fake_timedelta)
    composed = composed.replace(
        "\n\n", "\n\n%d\n%s %s" % (fake_idx, srt_timestamp, garbage)
    )

    with assert_raises(srt.SRTParseError):
        list(srt.parse(composed))
Exemplo n.º 37
0
def test_compose_and_parse_from_file(input_subs):
    srt_file = StringIO(srt.compose(input_subs, reindex=False))
    reparsed_subs = srt.parse(srt_file)
    subs_eq(reparsed_subs, input_subs)
Exemplo n.º 38
0
def test_can_compose_without_eol_at_all(input_subs):
    composed = srt.compose(input_subs, reindex=False)
    composed_without_ending_blank = composed.rstrip('\r\n')
    reparsed_subs = srt.parse(composed_without_ending_blank)
    subs_eq(reparsed_subs, input_subs)
Exemplo n.º 39
0
def test_compose_and_parse_strict_custom_eol(input_subs, eol):
    composed = srt.compose(input_subs, reindex=False, eol=eol)
    reparsed_subs = srt.parse(composed)
    subs_eq(reparsed_subs, input_subs)
Exemplo n.º 40
0
def test_compose_and_parse_strict(input_subs):
    composed = srt.compose(input_subs, reindex=False)
    reparsed_subs = srt.parse(composed)
    subs_eq(reparsed_subs, input_subs)
Exemplo n.º 41
0
def test_parsing_spaced_arrow(subs):
    spaced_block = srt.compose(subs, reindex=False, strict=False).replace("-->", "- >")
    reparsed_subtitles = srt.parse(spaced_block)
    subs_eq(reparsed_subtitles, subs)