def check_caption(self, paragraph: P, caption_id: str, begin: str, end: str, *children): self.assertEqual(caption_id, paragraph.get_id()) self.assertEqual(SccTimeCode.parse(begin).to_temporal_offset(), paragraph.get_begin()) self.assertEqual(SccTimeCode.parse(end).to_temporal_offset(), paragraph.get_end()) p_children = list(paragraph) self.assertEqual(len(children), len(p_children)) for (index, child) in enumerate(p_children): expected_child = children[index] if isinstance(expected_child, str): texts = list(child) self.assertEqual(expected_child, texts[0].get_text()) else: self.assertEqual(expected_child, Br)
def test_process_element(self): supported_style_properties = SupportedStylePropertiesFilter({ StyleProperties.Color: [NamedColors.red.value], StyleProperties.Extent: [] }) p = P() for style in StyleProperties.ALL: p.set_style(style, style.make_initial_value()) self.assertEqual(len(StyleProperties.ALL), len(p._styles)) self.assertEqual(StyleProperties.Color.make_initial_value(), p.get_style(StyleProperties.Color)) self.assertEqual(StyleProperties.Extent.make_initial_value(), p.get_style(StyleProperties.Extent)) supported_style_properties._process_element(p) self.assertIsNone(p.get_style(StyleProperties.Color)) self.assertEqual(StyleProperties.Extent.make_initial_value(), p.get_style(StyleProperties.Extent)) self.assertEqual(1, len(p._styles)) p.set_style(StyleProperties.Color, NamedColors.red.value) supported_style_properties._process_element(p) self.assertEqual(2, len(p._styles)) self.assertEqual(NamedColors.red.value, p.get_style(StyleProperties.Color)) self.assertEqual(StyleProperties.Extent.make_initial_value(), p.get_style(StyleProperties.Extent))
def to_paragraph(self, doc: ContentDocument) -> P: """Converts and returns current caption paragraph into P instance""" # Set up a new paragraph p = P() p.set_doc(doc) p.set_id(self._caption_id) if self._begin is not None: p.set_begin(self._begin.to_temporal_offset()) if self._end is not None: p.set_end(self._end.to_temporal_offset()) # Set the region to current caption region = _SccParagraphRegion(self, doc) p.set_region(region.get_region()) # Set the paragraph style for (prop, value) in self.get_style_properties().items(): p.set_style(prop, value) # Add caption content (text and line-breaks) for caption_content in self._caption_contents: if isinstance(caption_content, SccCaptionLineBreak): p.push_child(Br(doc)) continue if isinstance(caption_content, SccCaptionText): span = Span(doc) if caption_content.get_begin() is not None: begin = caption_content.get_begin().to_temporal_offset() if self.get_caption_style() is SccCaptionStyle.PaintOn: # Compute paragraph-relative begin time begin -= self._begin.to_temporal_offset() span.set_begin(begin) if caption_content.get_end() is not None: end = caption_content.get_end().to_temporal_offset() if self.get_caption_style() is SccCaptionStyle.PaintOn: # Compute paragraph-relative end time end -= self._end.to_temporal_offset() span.set_end(end) for (prop, value) in caption_content.get_style_properties().items(): span.set_style(prop, value) span.push_child(Text(doc, caption_content.get_text())) p.push_child(span) return p
def _get_filled_body(isd: ISD, text: str) -> Body: body = Body(isd) text = Text(isd, text) span = Span(isd) span.push_child(text) p = P(isd) p.push_child(span) div = Div(isd) div.push_child(p) body.push_child(div) return body
def _get_filled_body(isd: ISD, *text_contents: List[str]) -> Body: body = Body(isd) for text_content in text_contents: div = Div(isd) body.push_child(div) for content in text_content: text = Text(isd, content) span = Span(isd) span.push_child(text) p = P(isd) p.push_child(span) div.push_child(p) return body
def test_process_element(self): default_style_value_filter = DefaultStylePropertyValuesFilter({ StyleProperties.Color: StyleProperties.Color.make_initial_value() }) p = P() for style in StyleProperties.ALL: p.set_style(style, style.make_initial_value()) self.assertEqual(len(StyleProperties.ALL), len(p._styles)) self.assertEqual(StyleProperties.Color.make_initial_value(), p.get_style(StyleProperties.Color)) default_style_value_filter._process_element(p) self.assertIsNone(p.get_style(StyleProperties.Color)) self.assertEqual(len(StyleProperties.ALL) - 1, len(p._styles))
def process(self, isd: ISD): """Merges the ISD document paragraphs for each regions""" LOGGER.debug("Apply paragraphs merging filter to ISD.") for region in isd.iter_regions(): for body in region: target_div = Div(isd) target_paragraph = P(isd) target_div.push_child(target_paragraph) original_divs = list(body) paragraphs = [] for div in original_divs: paragraphs += self._get_paragraphs(div) if len(paragraphs) <= 1: continue LOGGER.warning("Merging ISD paragraphs.") for div in original_divs: div.remove() for (index, p) in enumerate(paragraphs): for span in list(p): # Remove child from its parent body span.remove() # Add it to the target paragraph target_paragraph.push_child(span) # Separate each merged paragraph by a Br element if index < len(paragraphs) - 1: target_paragraph.push_child(Br(isd)) body.push_child(target_div)
def _read_line(context: _SccContext, line: str): """Reads the SCC line content""" if line == '': return chars = [] regex = re.compile(SCC_LINE_PATTERN) match = regex.match(line) if match: paragraph = P() context.count += 1 paragraph.set_id("caption" + str(context.count)) time_code = match.group(1) time_offset = time_codes.parse(time_code) paragraph.set_begin(time_offset) hex_words = line.split('\t')[1].split(' ') for hex_word in hex_words: chars += _read_word(hex_word) paragraph.set_doc(context.div.get_doc()) context.div.push_child(paragraph)
def test_srt_writer(self): doc = ContentDocument() r1 = Region("r1", doc) doc.put_region(r1) r2 = Region("r2", doc) r2.set_begin(Fraction(2)) r2.set_end(Fraction(4)) doc.put_region(r2) body = Body(doc) doc.set_body(body) div = Div(doc) body.push_child(div) p = P(doc) p.set_region(r1) p.set_end(Fraction(2)) div.push_child(p) span = Span(doc) span.push_child(Text(doc, "Lorem ipsum dolor sit amet,")) p.push_child(span) p = P(doc) p.set_region(r2) div.push_child(p) span = Span(doc) span.push_child(Text(doc, "consectetur adipiscing elit.")) p.push_child(span) p = P(doc) p.set_region(r1) p.set_begin(Fraction(4)) p.set_end(Fraction(6)) div.push_child(p) span = Span(doc) span.push_child( Text(doc, "Pellentesque interdum lacinia sollicitudin.")) p.push_child(span) expected_srt = """1 00:00:00,000 --> 00:00:02,000 Lorem ipsum dolor sit amet, 2 00:00:02,000 --> 00:00:04,000 consectetur adipiscing elit. 3 00:00:04,000 --> 00:00:06,000 Pellentesque interdum lacinia sollicitudin. """ srt_from_model = srt_writer.from_model(doc) self.assertEqual(expected_srt, srt_from_model)
def test_process_isd(self): supported_style_properties = SupportedStylePropertiesFilter({ StyleProperties.BackgroundColor: [NamedColors.red.value], StyleProperties.Extent: [] }) doc = ContentDocument() r1 = Region("r1", doc) r1.set_style(StyleProperties.BackgroundColor, NamedColors.red.value) r1.set_style(StyleProperties.LuminanceGain, 2.0) doc.put_region(r1) b = Body(doc) b.set_begin(Fraction(1)) b.set_end(Fraction(10)) doc.set_body(b) div1 = Div(doc) div1.set_region(r1) b.push_child(div1) p1 = P(doc) p1.set_style(StyleProperties.BackgroundColor, NamedColors.white.value) p1.set_style(StyleProperties.Direction, DirectionType.rtl) div1.push_child(p1) span1 = Span(doc) span1.set_style(StyleProperties.BackgroundColor, NamedColors.red.value) span1.set_style(StyleProperties.FontStyle, FontStyleType.italic) span1.set_style(StyleProperties.Direction, DirectionType.ltr) p1.push_child(span1) t1 = Text(doc, "hello") span1.push_child(t1) significant_times = sorted(ISD.significant_times(doc)) self.assertEqual(3, len(significant_times)) isd = ISD.from_model(doc, significant_times[1]) r1 = isd.get_region("r1") self.assertEqual(len(Region._applicableStyles), len(r1._styles)) self.assertEqual(NamedColors.red.value, r1.get_style(StyleProperties.BackgroundColor)) self.assertEqual(2.0, r1.get_style(StyleProperties.LuminanceGain)) body1 = list(r1)[0] div1 = list(body1)[0] p1 = list(div1)[0] span1 = list(p1)[0] self.assertEqual(len(P._applicableStyles), len(p1._styles)) self.assertEqual(NamedColors.white.value, p1.get_style(StyleProperties.BackgroundColor)) self.assertEqual(DirectionType.rtl, p1.get_style(StyleProperties.Direction)) self.assertEqual(len(Span._applicableStyles), len(span1._styles)) self.assertEqual(NamedColors.red.value, span1.get_style(StyleProperties.BackgroundColor)) self.assertEqual(FontStyleType.italic, span1.get_style(StyleProperties.FontStyle)) self.assertEqual(DirectionType.ltr, span1.get_style(StyleProperties.Direction)) supported_style_properties.process(isd) self.assertEqual(2, len(r1._styles)) self.assertEqual(NamedColors.red.value, r1.get_style(StyleProperties.BackgroundColor)) self.assertEqual( ExtentType(height=LengthType(value=0.0, units=LengthType.Units.rh), width=LengthType(value=0.0, units=LengthType.Units.rw)), r1.get_style(StyleProperties.Extent)) self.assertEqual(0, len(p1._styles)) self.assertIsNone(p1.get_style(StyleProperties.BackgroundColor)) self.assertIsNone(p1.get_style(StyleProperties.Direction)) self.assertEqual(1, len(span1._styles)) self.assertEqual(NamedColors.red.value, span1.get_style(StyleProperties.BackgroundColor)) self.assertIsNone(span1.get_style(StyleProperties.FontStyle)) self.assertIsNone(span1.get_style(StyleProperties.Direction))
def to_paragraph(self, doc: ContentDocument) -> P: """Converts and returns current caption paragraph into P instance""" # Set up a new paragraph p = P() p.set_doc(doc) p.set_id(self._caption_id) if self._begin is not None: p.set_begin(self._begin.to_temporal_offset()) if self._end is not None: p.set_end(self._end.to_temporal_offset()) # Set the region to current caption region = _SccParagraphRegion(self, doc) p.set_region(region.get_region()) # Set the paragraph style for (prop, value) in self.get_style_properties().items(): p.set_style(prop, value) # Add caption content (text and line-breaks) last_row: Optional[int] = None for row, caption_line in sorted(self._caption_lines.items()): if last_row is not None: for _ in range(0, abs(last_row - row)): p.push_child(Br(doc)) last_row = row for caption_text in caption_line.get_texts(): # Skip empty texts if caption_text.is_empty(): continue span = Span(doc) if caption_text.get_begin() is not None: begin = caption_text.get_begin().to_temporal_offset() if self.get_caption_style() is SccCaptionStyle.PaintOn: # Compute paragraph-relative begin time begin -= self._begin.to_temporal_offset() span.set_begin(begin) if caption_text.get_end() is not None: end = caption_text.get_end().to_temporal_offset() if self.get_caption_style() is SccCaptionStyle.PaintOn: # Compute paragraph-relative end time end -= self._end.to_temporal_offset() span.set_end(end) for (prop, value) in caption_text.get_style_properties().items(): span.set_style(prop, value) if StyleProperties.BackgroundColor not in caption_text.get_style_properties( ): span.set_style(StyleProperties.BackgroundColor, NamedColors.black.value) span.push_child(Text(doc, caption_text.get_text())) p.push_child(span) return p
def test_process_isd(self): default_style_value_filter = DefaultStylePropertyValuesFilter({ StyleProperties.BackgroundColor: NamedColors.red.value, StyleProperties.Direction: DirectionType.ltr }) doc = ContentDocument() r1 = Region("r1", doc) r1.set_style(StyleProperties.BackgroundColor, NamedColors.red.value) r1.set_style(StyleProperties.LuminanceGain, 2.0) doc.put_region(r1) b = Body(doc) b.set_begin(Fraction(1)) b.set_end(Fraction(10)) doc.set_body(b) div1 = Div(doc) div1.set_region(r1) b.push_child(div1) p1 = P(doc) p1.set_style(StyleProperties.BackgroundColor, NamedColors.white.value) p1.set_style(StyleProperties.Direction, DirectionType.rtl) div1.push_child(p1) span1 = Span(doc) span1.set_style(StyleProperties.BackgroundColor, NamedColors.red.value) span1.set_style(StyleProperties.FontStyle, FontStyleType.italic) span1.set_style(StyleProperties.Direction, DirectionType.ltr) p1.push_child(span1) t1 = Text(doc, "hello") span1.push_child(t1) significant_times = sorted(ISD.significant_times(doc)) self.assertEqual(3, len(significant_times)) isd = ISD.from_model(doc, significant_times[1]) r1 = isd.get_region("r1") self.assertEqual(len(Region._applicableStyles), len(r1._styles)) self.assertEqual(NamedColors.red.value, r1.get_style(StyleProperties.BackgroundColor)) self.assertEqual(2.0, r1.get_style(StyleProperties.LuminanceGain)) body1 = list(r1)[0] div1 = list(body1)[0] p1 = list(div1)[0] span1 = list(p1)[0] self.assertEqual(len(P._applicableStyles), len(p1._styles)) self.assertEqual(NamedColors.white.value, p1.get_style(StyleProperties.BackgroundColor)) self.assertEqual(DirectionType.rtl, p1.get_style(StyleProperties.Direction)) self.assertEqual(len(Span._applicableStyles), len(span1._styles)) self.assertEqual(NamedColors.red.value, span1.get_style(StyleProperties.BackgroundColor)) self.assertEqual(FontStyleType.italic, span1.get_style(StyleProperties.FontStyle)) self.assertEqual(DirectionType.ltr, span1.get_style(StyleProperties.Direction)) default_style_value_filter.process(isd) self.assertEqual(len(Region._applicableStyles) - 1, len(r1._styles)) self.assertIsNone(r1.get_style(StyleProperties.BackgroundColor)) self.assertEqual(len(P._applicableStyles), len(p1._styles)) self.assertEqual(NamedColors.white.value, p1.get_style(StyleProperties.BackgroundColor)) self.assertEqual(DirectionType.rtl, p1.get_style(StyleProperties.Direction)) self.assertEqual(len(Span._applicableStyles) - 1, len(span1._styles)) self.assertIsNone(span1.get_style(StyleProperties.BackgroundColor)) self.assertEqual(FontStyleType.italic, span1.get_style(StyleProperties.FontStyle)) self.assertEqual(DirectionType.ltr, span1.get_style(StyleProperties.Direction))