def get_tei_children_for_semantic_content( self, semantic_content: SemanticContentWrapper, context: TeiElementFactoryContext) -> List[etree.ElementBase]: LOGGER.debug('semantic_content: %s', semantic_content) assert isinstance(semantic_content, SemanticRawEquation) semantic_raw_equation = semantic_content children: T_ElementChildrenList = [ context.get_default_attributes_for_semantic_content( semantic_raw_equation) ] pending_whitespace = '' for child_semantic_content in semantic_raw_equation: if isinstance(child_semantic_content, SemanticRawEquationContent): layout_block = child_semantic_content.merged_block if pending_whitespace: children.append(pending_whitespace) children.extend( context.iter_layout_block_tei_children(layout_block)) pending_whitespace = layout_block.whitespace continue pending_whitespace = context.append_tei_children_list_and_get_whitespace( children, child_semantic_content, pending_whitespace=pending_whitespace) return [TEI_E('formula', *children)]
def get_tei_children_for_semantic_content( self, semantic_content: SemanticContentWrapper, context: TeiElementFactoryContext) -> List[etree.ElementBase]: LOGGER.debug('semantic_content: %s', semantic_content) assert isinstance(semantic_content, SemanticParagraph) semantic_paragraph = semantic_content result: List[etree.ElementBase] = [] for flat_parent_semantic_content in iter_flat_paragraph_formula( semantic_paragraph): if not isinstance(flat_parent_semantic_content, SemanticParagraph): result.extend( context.get_tei_child_elements_for_semantic_content( flat_parent_semantic_content)) continue children: T_ElementChildrenList = [ context.get_default_attributes_for_semantic_content( flat_parent_semantic_content) ] pending_whitespace = '' for child_semantic_content in flat_parent_semantic_content: pending_whitespace = context.append_tei_children_list_and_get_whitespace( children, child_semantic_content, pending_whitespace=pending_whitespace) result.append(TEI_E('p', *children)) return result
def get_tei_element_for_semantic_content( self, semantic_content: SemanticContentWrapper, context: TeiElementFactoryContext) -> etree.ElementBase: LOGGER.debug('semantic_content: %s', semantic_content) assert isinstance(semantic_content, SemanticTable) semantic_table = semantic_content children = [ context.get_default_attributes_for_semantic_content(semantic_table) ] for child_semantic_content in semantic_table: if isinstance(child_semantic_content, SemanticLabel): layout_block = child_semantic_content.merged_block children.append( TEI_E( 'head', *context.iter_layout_block_tei_children(layout_block))) children.append( TEI_E( 'label', *context.iter_layout_block_tei_children(layout_block))) continue if isinstance(child_semantic_content, SemanticCaption): children.append( TEI_E( 'figDesc', *context.iter_layout_block_tei_children( child_semantic_content.merged_block))) continue children.extend( context.get_tei_child_elements_for_semantic_content( child_semantic_content)) return TEI_E('figure', {'type': 'table'}, *children)
def get_tei_reference_element( # pylint: disable=too-many-branches semantic_ref: SemanticReference, context: TeiElementFactoryContext) -> etree.ElementBase: LOGGER.debug('semantic_ref: %s', semantic_ref) tei_ref = TeiElementBuilder( TEI_E( 'biblStruct', context.get_default_attributes_for_semantic_content(semantic_ref))) is_first_date = True for semantic_content in semantic_ref: parent_path = context.get_parent_path_for_semantic_content( semantic_content) tei_child_parent = tei_ref.get_or_create(parent_path) if isinstance(semantic_content, SemanticLabel): tei_child_parent.append( create_tei_note_element('label', semantic_content.merged_block)) continue if isinstance(semantic_content, SemanticRawReferenceText): tei_child_parent.append( create_tei_note_element('raw_reference', semantic_content.merged_block)) continue if isinstance(semantic_content, SemanticTitle): tei_child_parent.append( TEI_E( 'title', { 'level': 'a', 'type': 'main' }, *context.iter_layout_block_tei_children( semantic_content.merged_block))) continue if isinstance(semantic_content, SemanticAuthor): tei_child_parent.append( get_tei_author_for_semantic_author_element(semantic_content, context=context)) continue if isinstance(semantic_content, SemanticDate): tei_child_parent = tei_ref.get_or_create(['monogr', 'imprint']) attrib = {} if is_first_date: # assume first date is published date (more or less matches GROBID) attrib['type'] = 'published' if semantic_content.year: attrib['when'] = str(semantic_content.year) tei_child_parent.append( TEI_E( 'date', attrib, *context.iter_layout_block_tei_children( layout_block=semantic_content.merged_block))) is_first_date = False continue tei_child_parent.extend( context.get_tei_child_elements_for_semantic_content( semantic_content)) return tei_ref.element
def get_tei_element_for_semantic_content( self, semantic_content: SemanticContentWrapper, context: TeiElementFactoryContext) -> etree.ElementBase: LOGGER.debug('semantic_content: %s', semantic_content) assert isinstance(semantic_content, SemanticMixedNote) semantic_mixed_note = semantic_content note_type = semantic_mixed_note.note_type or 'other' children = [ context.get_default_attributes_for_semantic_content( semantic_mixed_note) ] children.append({'type': note_type}) for child_semantic_content in semantic_mixed_note: children.extend( context.get_tei_child_elements_for_semantic_content( child_semantic_content)) return TEI_E('note', *children)
def _get_tei_raw_reference_element( semantic_raw_ref: SemanticRawReference, context: TeiElementFactoryContext) -> etree.ElementBase: LOGGER.debug('semantic_raw_ref: %s', semantic_raw_ref) children = [] for semantic_content in semantic_raw_ref: if isinstance(semantic_content, SemanticRawReferenceText): children.append( create_tei_note_element('raw_reference', semantic_content.merged_block)) continue children.extend( context.get_tei_child_elements_for_semantic_content( semantic_content)) tei_ref = TEI_E( 'biblStruct', context.get_default_attributes_for_semantic_content(semantic_raw_ref), *children) return tei_ref
def get_tei_element_for_semantic_content( self, semantic_content: SemanticContentWrapper, context: TeiElementFactoryContext) -> etree.ElementBase: LOGGER.debug('semantic_content: %s', semantic_content) assert isinstance(semantic_content, SemanticHeading) semantic_heading = semantic_content children: T_ElementChildrenList = [ context.get_default_attributes_for_semantic_content( semantic_heading) ] pending_whitespace = '' for child_semantic_content in semantic_heading: if isinstance(child_semantic_content, SemanticLabel): children.append({'n': child_semantic_content.get_text()}) continue layout_block = child_semantic_content.merged_block if pending_whitespace: children.append(pending_whitespace) children.extend( context.iter_layout_block_tei_children( layout_block=layout_block, enable_coordinates=False)) pending_whitespace = layout_block.whitespace return TEI_E('head', *children)
def get_tei_element_for_semantic_content( self, semantic_content: SemanticContentWrapper, context: TeiElementFactoryContext) -> etree.ElementBase: assert isinstance(semantic_content, SemanticCitation) citation = semantic_content citation_type = CITATION_TYPE_BY_SEMANTIC_CLASS.get(type(citation)) attributes = {} if citation_type: attributes['type'] = citation_type if citation.target_content_id: attributes['target'] = '#' + citation.target_content_id return TEI_E( 'ref', attributes, *context.iter_layout_block_tei_children(citation.merged_block))
def get_tei_raw_reference_list_element( semantic_reference_list: SemanticReferenceList, context: TeiElementFactoryContext) -> etree.ElementBase: tei_reference_list = TeiElementBuilder(TEI_E('listBibl')) for semantic_content in semantic_reference_list: if isinstance(semantic_content, SemanticRawReference): tei_reference_list.append( _get_tei_raw_reference_element(semantic_content, context=context)) continue if isinstance(semantic_content, SemanticReference): tei_reference_list.append( get_tei_reference_element(semantic_content, context=context)) continue tei_reference_list.extend( context.get_tei_child_elements_for_semantic_content( semantic_content)) return tei_reference_list.element
def get_tei_element_for_semantic_content( self, semantic_content: SemanticContentWrapper, context: TeiElementFactoryContext ) -> etree.ElementBase: assert isinstance(semantic_content, SemanticExternalIdentifier) external_identifier = semantic_content if LOGGER.isEnabledFor(logging.DEBUG): LOGGER.debug( 'external_identifier: type=%r, value=%r, text=%r, content=%r', external_identifier.external_identifier_type, external_identifier.value, external_identifier.get_text(), external_identifier ) attributes = context.get_default_attributes_for_semantic_content(external_identifier) if external_identifier.external_identifier_type: attributes = {**attributes, 'type': external_identifier.external_identifier_type} return TEI_E('idno', attributes, external_identifier.value)
def get_tei_element_for_semantic_content( self, semantic_content: SemanticContentWrapper, context: TeiElementFactoryContext) -> etree.ElementBase: LOGGER.debug('semantic_content: %s', semantic_content) assert isinstance(semantic_content, SemanticGraphic) semantic_graphic = semantic_content layout_graphic = semantic_graphic.layout_graphic children = [ context.get_default_attributes_for_semantic_content( semantic_graphic, enable_coordinates=False) ] if semantic_graphic.relative_path: children.append({'url': semantic_graphic.relative_path}) if layout_graphic: if layout_graphic.coordinates: children.append( {'coords': format_coordinates(layout_graphic.coordinates)}) if layout_graphic.graphic_type: children.append({'type': layout_graphic.graphic_type}) return TEI_E('graphic', *children)
def get_tei_children_for_semantic_content( self, semantic_content: SemanticContentWrapper, context: TeiElementFactoryContext) -> List[etree.ElementBase]: LOGGER.debug('semantic_content: %s', semantic_content) assert isinstance(semantic_content, SemanticSection) semantic_section = semantic_content tei_section = TeiElementBuilder(TEI_E('div')) for child_semantic_content in semantic_section: if isinstance(child_semantic_content, ( SemanticFigure, SemanticTable, )): # rendered at parent level continue tei_section.extend( context.get_tei_child_elements_for_semantic_content( child_semantic_content)) if semantic_content.section_type == SemanticSectionTypes.ACKNOWLEDGEMENT: tei_section.element.attrib['type'] = 'acknowledgement' if not list(tei_section.element): return [] return [tei_section.element]