def check_ass_tags(event: AssEvent) -> T.Iterable[BaseResult]: try: ass_line = parse_ass(event.text) except ParseError as ex: yield Violation(event, f"invalid syntax ({ex})") return for i, item in enumerate(ass_line): if ( isinstance(item, AssTagListOpening) and isinstance(get(ass_line, i - 1), AssTagListEnding) and not isinstance(get(ass_line, i - 2), AssTagKaraoke) ): yield Violation(event, "disjointed tags") if isinstance(item, AssTagListEnding) and isinstance( get(ass_line, i - 1), AssTagListOpening ): yield Violation(event, "pointless tag") for item in ass_line: if isinstance(item, AssTagAlignment) and item.legacy: yield Violation(event, "using legacy alignment tag") elif isinstance(item, AssTagComment): yield Violation(event, "use notes to make comments")
def spell_check_ass_line(spell_checker: BaseSpellChecker, text: str) -> T.Iterable[T.Tuple[int, int, str]]: """Iterate over badly spelled words within an ASS line. Doesn't take into account effects such as text invisibility etc. :param spell_checker: spell checker to validate the words with :param text: input ASS line :return: iterator over tuples with start, end and text """ try: ass_line = ass_tag_parser.parse_ass(text) except ass_tag_parser.ParseError: return [] results: T.List[T.Tuple[int, int, str]] = [] for item in ass_line: if isinstance(item, ass_tag_parser.AssText): for match in iter_words_ass_line(item.text): word = match.group(0) if not spell_checker.check(word): results.append(( item.meta.start + match.start(), item.meta.start + match.end(), word, )) return results
def extract_text(text: str) -> str: try: ass_line = ass_tag_parser.parse_ass(text) except ass_tag_parser.ParseError as ex: return text ret = "" for item in ass_line: if isinstance(item, ass_tag_parser.AssText): ret += item.text return functools.reduce( lambda ret, word: ret.replace(word, ""), list("…–—♪") + ["\\N"], ret ).strip()
def collect_text_chunks(events: T.List[AssEvent]) -> T.Iterable[str]: for event in events: text = event.note try: ass_line = ass_tag_parser.parse_ass(text) except ass_tag_parser.ParseError as ex: if text: yield text else: for item in ass_line: if isinstance(item, ass_tag_parser.AssText) and item.text: yield item.text
def ass_to_plaintext(text: str) -> str: """Strip ASS tags from an ASS line. :param text: input ASS line :return: plain text """ try: ass_line = ass_tag_parser.parse_ass(text) except ass_tag_parser.ParseError: ret = str(regex.sub("{[^}]*}", "", text)) else: ret = "" for item in ass_line: if isinstance(item, ass_tag_parser.AssText): ret += item.text return ret.replace("\\h", " ").replace("\\n", " ").replace("\\N", "\n")
def fix_useless_ass_tags(text: str, style: T.Optional[AssStyle] = None) -> str: try: ass_line = ass_tag_parser.parse_ass(text) except ass_tag_parser.ParseError as ex: return text def remove_useless_italics( ass_line: T.Iterable[ass_tag_parser.AssItem], ) -> T.Iterable[ass_tag_parser.AssItem]: last_state = style.italic if style else None for item in ass_line: if isinstance(item, ass_tag_parser.AssTagItalic): if last_state == item.enabled: continue last_state = item.enabled yield item def remove_useless_bold( ass_line: T.Iterable[ass_tag_parser.AssItem], ) -> T.Iterable[ass_tag_parser.AssItem]: last_state = (style.bold if style else None), None for item in ass_line: if isinstance(item, ass_tag_parser.AssTagBold): if last_state == (item.enabled, item.weight): continue last_state = item.enabled, item.weight yield item def remove_useless_alignment( ass_line: T.Iterable[ass_tag_parser.AssItem], ) -> T.Iterable[ass_tag_parser.AssItem]: last_state = style.alignment if style else None for item in ass_line: if isinstance(item, ass_tag_parser.AssTagAlignment): if last_state == item.alignment: continue last_state = item.alignment yield item ass_line = remove_useless_italics(ass_line) ass_line = remove_useless_bold(ass_line) ass_line = remove_useless_alignment(ass_line) return ass_tag_parser.compose_ass(ass_line)
def fix_whitespace(text: str) -> str: try: ass_line = ass_tag_parser.parse_ass(text) except ass_tag_parser.ParseError as ex: # dumb replace return re.sub(" *\n *", "\n", text.strip(), flags=re.M) for item in ass_line: if isinstance(item, ass_tag_parser.AssText): item.text = item.text.lstrip() break for item in reversed(ass_line): if isinstance(item, ass_tag_parser.AssText): item.text = item.text.rstrip() break for item in ass_line: if isinstance(item, ass_tag_parser.AssText): item.text = re.sub(" *\n *", "\n", item.text, flags=re.M) return ass_tag_parser.compose_ass(ass_line)
def fix_bad_dialogue_dashes(text: str) -> str: try: ass_line = ass_tag_parser.parse_ass(text) except ass_tag_parser.ParseError as ex: # dumb replace return re.sub("^- ", "\N{EN DASH} ", text, flags=re.M) else: plain_text_so_far = "" ret = "" for item in ass_line: chunk = item.meta.text if isinstance(item, ass_tag_parser.AssText): if chunk.startswith("- ") and ( plain_text_so_far.endswith("\n") or not plain_text_so_far ): chunk = re.sub("^- ", "\N{EN DASH} ", chunk, flags=re.M) plain_text_so_far += chunk ret += chunk return ret
def put_text_chunks(events: T.List[AssEvent], chunks: T.List[str]) -> None: for event in events: text = event.note try: ass_line = ass_tag_parser.parse_ass(text) except ass_tag_parser.ParseError as ex: text = chunks.pop(0) else: text = "" for item in ass_line: if isinstance(item, ass_tag_parser.AssText) and item.text: text += chunks.pop(0) else: text += item.meta.text if not text: continue if event.text: event.text += "\\N" + text else: event.text = text
def _rescale_ass_tags(api: Api, x_factor: float, y_factor: float) -> None: for event in api.subs.events: try: ass_line = ass_tag_parser.parse_ass(event.text) except ass_tag_parser.ParseError: return for item in ass_line: if isinstance( item, ( ass_tag_parser.AssTagBorder, ass_tag_parser.AssTagXBorder, ass_tag_parser.AssTagYBorder, ass_tag_parser.AssTagShadow, ass_tag_parser.AssTagXShadow, ass_tag_parser.AssTagYShadow, ), ): item.size *= y_factor elif isinstance( item, ( ass_tag_parser.AssTagPosition, ass_tag_parser.AssTagRotationOrigin, ), ): item.x = int(item.x * x_factor) item.y = int(item.y * y_factor) elif isinstance(item, ass_tag_parser.AssTagMove): item.x1 = int(item.x1 * x_factor) item.y1 = int(item.y1 * y_factor) item.x2 = int(item.x2 * x_factor) item.y2 = int(item.y2 * y_factor) elif isinstance(item, ass_tag_parser.AssTagFontSize): item.size = int(item.size * y_factor) event.text = ass_tag_parser.compose_ass(ass_line)
def get_used_font_styles( api: Api ) -> T.Dict[T.Tuple[str, bool, bool], T.Set[str]]: results = defaultdict(set) styles = {style.name: style for style in api.subs.styles} for line in api.subs.events: if line.is_comment: continue if line.style not in styles: continue family = styles[line.style].font_name is_bold = styles[line.style].bold is_italic = styles[line.style].italic try: ass_line = ass_tag_parser.parse_ass(line.text) except ass_tag_parser.ParseError: # ASS parsing errors are handled elsewhere continue for item in ass_line: if isinstance(item, ass_tag_parser.AssTagBold): is_bold = ( item.enabled if item.weight is None else item.weight > 100 ) elif isinstance(item, ass_tag_parser.AssTagItalic): is_italic = item.enabled elif isinstance(item, ass_tag_parser.AssTagFontName): family = ( item.name if item.name else styles[line.style].font_name ) elif isinstance(item, ass_tag_parser.AssText): for glyph in item.text: results[(family, is_bold, is_italic)].add(glyph) return results
def parse(self, ass_string, sub_format="ass"): self.format = sub_format subs = pysubs2.SSAFile.from_string(ass_string, sub_format) self.subs = subs input_hold = StringHolder() for line in subs: try: subdoc = parse_ass(line.text) except errors.ParseError: input_hold.string += translation_delimeter foundTextTag = False for tag in subdoc: found_tag = readTagText(tag, input_hold) if found_tag is not None: foundTextTag = found_tag if foundTextTag is not None: #if input_hold.string[-1] != '\n': #input_hold.string += '\n' input_hold.string += translation_delimeter line.text = compose_ass(subdoc) return input_hold.string
def check_ass_tags(event: AssEvent) -> T.Iterable[BaseResult]: try: ass_line = parse_ass(event.text) except ParseError as ex: yield Violation(event, f"invalid syntax ({ex})") return for i, item in enumerate(ass_line): if (isinstance(item, AssTagListOpening) and isinstance(get(ass_line, i - 1), AssTagListEnding) and not isinstance(get(ass_line, i - 2), AssTagKaraoke)): yield Violation(event, "disjointed tags") if isinstance(item, AssTagListEnding) and isinstance( get(ass_line, i - 1), AssTagListOpening): yield Violation(event, "pointless tag") for item in ass_line: if isinstance(item, AssTagAlignment) and item.legacy: yield Violation(event, "using legacy alignment tag") elif isinstance(item, AssTagComment): yield Violation(event, "use notes to make comments")
def compose(self, translatedString, sub_format="ass"): subs = self.subs translatedLines = translatedString.split(translation_delimeter) #print(translatedString) line_i = 0 for sub in subs: translated_string = translatedLines[line_i] if self.format == "ass": translated_string = autoLineBreaks(translated_string, '\\N') elif self.format == "srt": translated_string = autoLineBreaks(translated_string, '\n') subdoc = parse_ass(sub.text) for tag in subdoc: x = readTagText(tag, None) if x is not None: x.text = translated_string line_i += 1 break sub.text = compose_ass(subdoc) subs.info["Original Translation"] = "Líný překlad z dulik.net/aichan" return subs.to_string(format_=sub_format)
def get_used_font_styles( api: Api, ) -> T.Dict[T.Tuple[str, bool, bool], T.Set[str]]: results = defaultdict(set) styles = {style.name: style for style in api.subs.styles} for line in api.subs.events: if line.is_comment: continue if line.style not in styles: continue family = styles[line.style].font_name is_bold = styles[line.style].bold is_italic = styles[line.style].italic try: ass_line = ass_tag_parser.parse_ass(line.text) except ass_tag_parser.ParseError: # ASS parsing errors are handled elsewhere continue for item in ass_line: if isinstance(item, ass_tag_parser.AssTagBold): is_bold = (item.enabled if item.weight is None else item.weight > 100) elif isinstance(item, ass_tag_parser.AssTagItalic): is_italic = item.enabled elif isinstance(item, ass_tag_parser.AssTagFontName): family = (item.name if item.name else styles[line.style].font_name) elif isinstance(item, ass_tag_parser.AssText): for glyph in item.text: results[(family, is_bold, is_italic)].add(glyph) return results
def _get_syllables(self, text: str) -> T.Iterable[_Syllable]: chunks: T.List[T.List[ass_tag_parser.AssItem]] = [[]] durations: T.List[int] = [0] for item in ass_tag_parser.parse_ass(text): if isinstance(item, ass_tag_parser.AssTagKaraoke): durations.append(item.duration) chunks.append([]) elif not isinstance( item, ( ass_tag_parser.AssTagListOpening, ass_tag_parser.AssTagListEnding, ), ): chunks[-1].append(item) while chunks and durations and not chunks[0] and not durations[0]: chunks.pop(0) durations.pop(0) for duration, chunk in zip(durations, chunks): text = ass_tag_parser.compose_ass(chunk) yield _Syllable(text, duration)
'STYLE_NAME': event.style, 'STYLES_LIST': list(fl.styles.keys()), 'EVENT_TYPE': event.type, 'EVENT_CONTENT': event, 'FILE_NAME': full_file_path, })) else: # Prepare font data current_font['fontname'] = fl.styles[event.style].fontname current_font['bold'] = fl.styles[event.style].bold current_font['italic'] = fl.styles[event.style].italic try: # Parsing the Dialogue ass_tags = ass_tag_parser.parse_ass(event.text) # Parsing each tag individually for tag in ass_tags: # Check if tag is AssTagResetStyle, this tag does one thing of two: # 1 - Use the style of Dialogue. # 2 - Assign a style from Style list in the ass file. if type(tag) == ass_tag_parser.AssTagResetStyle: # If style name is not present, then use the Dialogue style if (tag.style is None) or (type(tag.style) == str and len(tag.style) < 1): current_font['fontname'] = fl.styles[event.style].fontname current_font['bold'] = fl.styles[event.style].bold current_font['italic'] = fl.styles[event.style].italic # If style name is present, use it.