Exemplo n.º 1
0
 def _match_graphic_elements(
     self,
     semantic_graphic_list: Sequence[SemanticGraphic],
     candidate_semantic_content_list: Sequence[SemanticContentWrapper],
     unmatched_graphics_container: SemanticMixedContentWrapper
 ):
     _graphic_matchers: List[GraphicMatcher] = [
         BoundingBoxDistanceGraphicMatcher(),
         GraphicRelatedBlockTextGraphicMatcher()
     ]
     if self.config.use_ocr_model:
         assert self.fulltext_models.ocr_model
         _graphic_matchers.append(
             OpticalCharacterRecognitionGraphicMatcher(
                 ocr_model=self.fulltext_models.ocr_model
             )
         )
     graphic_matcher = ChainedGraphicMatcher(_graphic_matchers)
     graphic_match_result = graphic_matcher.get_graphic_matches(
         semantic_graphic_list=semantic_graphic_list,
         candidate_semantic_content_list=candidate_semantic_content_list
     )
     for graphic_match in graphic_match_result.graphic_matches:
         if isinstance(graphic_match.candidate_semantic_content,  SemanticMixedContentWrapper):
             graphic_match.candidate_semantic_content.add_content(
                 graphic_match.semantic_graphic
             )
     LOGGER.info('unmatched_graphics: %r', graphic_match_result.unmatched_graphics)
     for unmatched_graphic in graphic_match_result.unmatched_graphics:
         unmatched_graphics_container.add_content(unmatched_graphic)
Exemplo n.º 2
0
def append_semantic_markers_for_layout_block(
        parent_semantic_content: SemanticMixedContentWrapper,
        layout_block: LayoutBlock) -> None:
    semantic_markers = list(
        iter_semantic_markers_for_layout_block(layout_block))
    for semantic_marker in semantic_markers:
        parent_semantic_content.add_content(semantic_marker)