Exemplo n.º 1
0
def check_ass_tags(event: AssEvent) -> T.Iterable[BaseResult]:
    try:
        ass_line = parse_ass(event.text)
    except ParseError as ex:
        yield Violation(event, f"invalid syntax ({ex})")
        return

    for i, item in enumerate(ass_line):
        if (
            isinstance(item, AssTagListOpening)
            and isinstance(get(ass_line, i - 1), AssTagListEnding)
            and not isinstance(get(ass_line, i - 2), AssTagKaraoke)
        ):
            yield Violation(event, "disjointed tags")

        if isinstance(item, AssTagListEnding) and isinstance(
            get(ass_line, i - 1), AssTagListOpening
        ):
            yield Violation(event, "pointless tag")

    for item in ass_line:
        if isinstance(item, AssTagAlignment) and item.legacy:
            yield Violation(event, "using legacy alignment tag")

        elif isinstance(item, AssTagComment):
            yield Violation(event, "use notes to make comments")
Exemplo n.º 2
0
def spell_check_ass_line(spell_checker: BaseSpellChecker,
                         text: str) -> T.Iterable[T.Tuple[int, int, str]]:
    """Iterate over badly spelled words within an ASS line.

    Doesn't take into account effects such as text invisibility etc.

    :param spell_checker: spell checker to validate the words with
    :param text: input ASS line
    :return: iterator over tuples with start, end and text
    """
    try:
        ass_line = ass_tag_parser.parse_ass(text)
    except ass_tag_parser.ParseError:
        return []

    results: T.List[T.Tuple[int, int, str]] = []

    for item in ass_line:
        if isinstance(item, ass_tag_parser.AssText):
            for match in iter_words_ass_line(item.text):
                word = match.group(0)
                if not spell_checker.check(word):
                    results.append((
                        item.meta.start + match.start(),
                        item.meta.start + match.end(),
                        word,
                    ))

    return results
Exemplo n.º 3
0
def extract_text(text: str) -> str:
    try:
        ass_line = ass_tag_parser.parse_ass(text)
    except ass_tag_parser.ParseError as ex:
        return text
    ret = ""
    for item in ass_line:
        if isinstance(item, ass_tag_parser.AssText):
            ret += item.text
    return functools.reduce(
        lambda ret, word: ret.replace(word, ""), list("…–—♪") + ["\\N"], ret
    ).strip()
Exemplo n.º 4
0
def collect_text_chunks(events: T.List[AssEvent]) -> T.Iterable[str]:
    for event in events:
        text = event.note
        try:
            ass_line = ass_tag_parser.parse_ass(text)
        except ass_tag_parser.ParseError as ex:
            if text:
                yield text
        else:
            for item in ass_line:
                if isinstance(item, ass_tag_parser.AssText) and item.text:
                    yield item.text
Exemplo n.º 5
0
def ass_to_plaintext(text: str) -> str:
    """Strip ASS tags from an ASS line.

    :param text: input ASS line
    :return: plain text
    """
    try:
        ass_line = ass_tag_parser.parse_ass(text)
    except ass_tag_parser.ParseError:
        ret = str(regex.sub("{[^}]*}", "", text))
    else:
        ret = ""
        for item in ass_line:
            if isinstance(item, ass_tag_parser.AssText):
                ret += item.text
    return ret.replace("\\h", " ").replace("\\n", " ").replace("\\N", "\n")
Exemplo n.º 6
0
def fix_useless_ass_tags(text: str, style: T.Optional[AssStyle] = None) -> str:
    try:
        ass_line = ass_tag_parser.parse_ass(text)
    except ass_tag_parser.ParseError as ex:
        return text

    def remove_useless_italics(
        ass_line: T.Iterable[ass_tag_parser.AssItem],
    ) -> T.Iterable[ass_tag_parser.AssItem]:
        last_state = style.italic if style else None
        for item in ass_line:
            if isinstance(item, ass_tag_parser.AssTagItalic):
                if last_state == item.enabled:
                    continue
                last_state = item.enabled
            yield item

    def remove_useless_bold(
        ass_line: T.Iterable[ass_tag_parser.AssItem],
    ) -> T.Iterable[ass_tag_parser.AssItem]:
        last_state = (style.bold if style else None), None
        for item in ass_line:
            if isinstance(item, ass_tag_parser.AssTagBold):
                if last_state == (item.enabled, item.weight):
                    continue
                last_state = item.enabled, item.weight
            yield item

    def remove_useless_alignment(
        ass_line: T.Iterable[ass_tag_parser.AssItem],
    ) -> T.Iterable[ass_tag_parser.AssItem]:
        last_state = style.alignment if style else None
        for item in ass_line:
            if isinstance(item, ass_tag_parser.AssTagAlignment):
                if last_state == item.alignment:
                    continue
                last_state = item.alignment
            yield item

    ass_line = remove_useless_italics(ass_line)
    ass_line = remove_useless_bold(ass_line)
    ass_line = remove_useless_alignment(ass_line)

    return ass_tag_parser.compose_ass(ass_line)
Exemplo n.º 7
0
def fix_whitespace(text: str) -> str:
    try:
        ass_line = ass_tag_parser.parse_ass(text)
    except ass_tag_parser.ParseError as ex:
        # dumb replace
        return re.sub(" *\n *", "\n", text.strip(), flags=re.M)

    for item in ass_line:
        if isinstance(item, ass_tag_parser.AssText):
            item.text = item.text.lstrip()
            break
    for item in reversed(ass_line):
        if isinstance(item, ass_tag_parser.AssText):
            item.text = item.text.rstrip()
            break
    for item in ass_line:
        if isinstance(item, ass_tag_parser.AssText):
            item.text = re.sub(" *\n *", "\n", item.text, flags=re.M)
    return ass_tag_parser.compose_ass(ass_line)
Exemplo n.º 8
0
def fix_bad_dialogue_dashes(text: str) -> str:
    try:
        ass_line = ass_tag_parser.parse_ass(text)
    except ass_tag_parser.ParseError as ex:
        # dumb replace
        return re.sub("^- ", "\N{EN DASH} ", text, flags=re.M)
    else:
        plain_text_so_far = ""
        ret = ""
        for item in ass_line:
            chunk = item.meta.text
            if isinstance(item, ass_tag_parser.AssText):
                if chunk.startswith("- ") and (
                    plain_text_so_far.endswith("\n") or not plain_text_so_far
                ):
                    chunk = re.sub("^- ", "\N{EN DASH} ", chunk, flags=re.M)
                plain_text_so_far += chunk
            ret += chunk
        return ret
Exemplo n.º 9
0
def put_text_chunks(events: T.List[AssEvent], chunks: T.List[str]) -> None:
    for event in events:
        text = event.note
        try:
            ass_line = ass_tag_parser.parse_ass(text)
        except ass_tag_parser.ParseError as ex:
            text = chunks.pop(0)
        else:
            text = ""
            for item in ass_line:
                if isinstance(item, ass_tag_parser.AssText) and item.text:
                    text += chunks.pop(0)
                else:
                    text += item.meta.text
        if not text:
            continue
        if event.text:
            event.text += "\\N" + text
        else:
            event.text = text
Exemplo n.º 10
0
def _rescale_ass_tags(api: Api, x_factor: float, y_factor: float) -> None:
    for event in api.subs.events:
        try:
            ass_line = ass_tag_parser.parse_ass(event.text)
        except ass_tag_parser.ParseError:
            return

        for item in ass_line:
            if isinstance(
                    item,
                (
                    ass_tag_parser.AssTagBorder,
                    ass_tag_parser.AssTagXBorder,
                    ass_tag_parser.AssTagYBorder,
                    ass_tag_parser.AssTagShadow,
                    ass_tag_parser.AssTagXShadow,
                    ass_tag_parser.AssTagYShadow,
                ),
            ):
                item.size *= y_factor

            elif isinstance(
                    item,
                (
                    ass_tag_parser.AssTagPosition,
                    ass_tag_parser.AssTagRotationOrigin,
                ),
            ):
                item.x = int(item.x * x_factor)
                item.y = int(item.y * y_factor)

            elif isinstance(item, ass_tag_parser.AssTagMove):
                item.x1 = int(item.x1 * x_factor)
                item.y1 = int(item.y1 * y_factor)
                item.x2 = int(item.x2 * x_factor)
                item.y2 = int(item.y2 * y_factor)

            elif isinstance(item, ass_tag_parser.AssTagFontSize):
                item.size = int(item.size * y_factor)

        event.text = ass_tag_parser.compose_ass(ass_line)
Exemplo n.º 11
0
def get_used_font_styles(
    api: Api
) -> T.Dict[T.Tuple[str, bool, bool], T.Set[str]]:
    results = defaultdict(set)

    styles = {style.name: style for style in api.subs.styles}
    for line in api.subs.events:
        if line.is_comment:
            continue

        if line.style not in styles:
            continue

        family = styles[line.style].font_name
        is_bold = styles[line.style].bold
        is_italic = styles[line.style].italic

        try:
            ass_line = ass_tag_parser.parse_ass(line.text)
        except ass_tag_parser.ParseError:
            # ASS parsing errors are handled elsewhere
            continue

        for item in ass_line:
            if isinstance(item, ass_tag_parser.AssTagBold):
                is_bold = (
                    item.enabled if item.weight is None else item.weight > 100
                )
            elif isinstance(item, ass_tag_parser.AssTagItalic):
                is_italic = item.enabled
            elif isinstance(item, ass_tag_parser.AssTagFontName):
                family = (
                    item.name if item.name else styles[line.style].font_name
                )
            elif isinstance(item, ass_tag_parser.AssText):
                for glyph in item.text:
                    results[(family, is_bold, is_italic)].add(glyph)

    return results
Exemplo n.º 12
0
    def parse(self, ass_string, sub_format="ass"):
        self.format = sub_format
        subs = pysubs2.SSAFile.from_string(ass_string, sub_format)
        self.subs = subs
        input_hold = StringHolder()
        for line in subs:
            try:
                subdoc = parse_ass(line.text)
            except errors.ParseError:
                input_hold.string += translation_delimeter
            foundTextTag = False
            for tag in subdoc:
                found_tag = readTagText(tag, input_hold)
                if found_tag is not None:
                    foundTextTag = found_tag
            if foundTextTag is not None:
                #if input_hold.string[-1] != '\n':
                #input_hold.string += '\n'
                input_hold.string += translation_delimeter

            line.text = compose_ass(subdoc)
        return input_hold.string
Exemplo n.º 13
0
def check_ass_tags(event: AssEvent) -> T.Iterable[BaseResult]:
    try:
        ass_line = parse_ass(event.text)
    except ParseError as ex:
        yield Violation(event, f"invalid syntax ({ex})")
        return

    for i, item in enumerate(ass_line):
        if (isinstance(item, AssTagListOpening)
                and isinstance(get(ass_line, i - 1), AssTagListEnding)
                and not isinstance(get(ass_line, i - 2), AssTagKaraoke)):
            yield Violation(event, "disjointed tags")

        if isinstance(item, AssTagListEnding) and isinstance(
                get(ass_line, i - 1), AssTagListOpening):
            yield Violation(event, "pointless tag")

    for item in ass_line:
        if isinstance(item, AssTagAlignment) and item.legacy:
            yield Violation(event, "using legacy alignment tag")

        elif isinstance(item, AssTagComment):
            yield Violation(event, "use notes to make comments")
Exemplo n.º 14
0
    def compose(self, translatedString, sub_format="ass"):
        subs = self.subs
        translatedLines = translatedString.split(translation_delimeter)
        #print(translatedString)

        line_i = 0
        for sub in subs:
            translated_string = translatedLines[line_i]
            if self.format == "ass":
                translated_string = autoLineBreaks(translated_string, '\\N')
            elif self.format == "srt":
                translated_string = autoLineBreaks(translated_string, '\n')

            subdoc = parse_ass(sub.text)
            for tag in subdoc:
                x = readTagText(tag, None)
                if x is not None:
                    x.text = translated_string
                    line_i += 1
                    break
            sub.text = compose_ass(subdoc)
        subs.info["Original Translation"] = "Líný překlad z dulik.net/aichan"
        return subs.to_string(format_=sub_format)
Exemplo n.º 15
0
def get_used_font_styles(
    api: Api, ) -> T.Dict[T.Tuple[str, bool, bool], T.Set[str]]:
    results = defaultdict(set)

    styles = {style.name: style for style in api.subs.styles}
    for line in api.subs.events:
        if line.is_comment:
            continue

        if line.style not in styles:
            continue

        family = styles[line.style].font_name
        is_bold = styles[line.style].bold
        is_italic = styles[line.style].italic

        try:
            ass_line = ass_tag_parser.parse_ass(line.text)
        except ass_tag_parser.ParseError:
            # ASS parsing errors are handled elsewhere
            continue

        for item in ass_line:
            if isinstance(item, ass_tag_parser.AssTagBold):
                is_bold = (item.enabled
                           if item.weight is None else item.weight > 100)
            elif isinstance(item, ass_tag_parser.AssTagItalic):
                is_italic = item.enabled
            elif isinstance(item, ass_tag_parser.AssTagFontName):
                family = (item.name
                          if item.name else styles[line.style].font_name)
            elif isinstance(item, ass_tag_parser.AssText):
                for glyph in item.text:
                    results[(family, is_bold, is_italic)].add(glyph)

    return results
Exemplo n.º 16
0
    def _get_syllables(self, text: str) -> T.Iterable[_Syllable]:
        chunks: T.List[T.List[ass_tag_parser.AssItem]] = [[]]
        durations: T.List[int] = [0]

        for item in ass_tag_parser.parse_ass(text):
            if isinstance(item, ass_tag_parser.AssTagKaraoke):
                durations.append(item.duration)
                chunks.append([])
            elif not isinstance(
                    item,
                (
                    ass_tag_parser.AssTagListOpening,
                    ass_tag_parser.AssTagListEnding,
                ),
            ):
                chunks[-1].append(item)

        while chunks and durations and not chunks[0] and not durations[0]:
            chunks.pop(0)
            durations.pop(0)

        for duration, chunk in zip(durations, chunks):
            text = ass_tag_parser.compose_ass(chunk)
            yield _Syllable(text, duration)
Exemplo n.º 17
0
                            'STYLE_NAME': event.style,
                            'STYLES_LIST': list(fl.styles.keys()),
                            'EVENT_TYPE': event.type,
                            'EVENT_CONTENT': event,
                            'FILE_NAME': full_file_path,
                        }))
                else:

                    # Prepare font data
                    current_font['fontname'] = fl.styles[event.style].fontname
                    current_font['bold'] = fl.styles[event.style].bold
                    current_font['italic'] = fl.styles[event.style].italic

                try:
                    # Parsing the Dialogue
                    ass_tags = ass_tag_parser.parse_ass(event.text)

                    # Parsing each tag individually
                    for tag in ass_tags:

                        # Check if tag is AssTagResetStyle, this tag does one thing of two:
                        # 1 - Use the style of Dialogue.
                        # 2 - Assign a style from Style list in the ass file.
                        if type(tag) == ass_tag_parser.AssTagResetStyle:
                            # If style name is not present, then use the Dialogue style
                            if (tag.style is None) or (type(tag.style) == str and len(tag.style) < 1):
                                current_font['fontname'] = fl.styles[event.style].fontname
                                current_font['bold'] = fl.styles[event.style].bold
                                current_font['italic'] = fl.styles[event.style].italic

                            # If style name is present, use it.