Python EntityReference примеры использования

Язык программирования: Python

Пространство имен/Пакет: common.types

Класс/Тип: EntityReference

Примеров на hotexamples.com: 7

Python EntityReference - 7 примеров найдено. Это лучшие примеры Python кода для common.types.EntityReference, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

EntityReference(7)

Основные методы

EntityReference (7)

Пример #1

Показать файл

    def save(self, item: SymbolData, _: None) -> None:
        symbols_with_ids = item.symbols_with_ids
        boxes = item.boxes
        matches = item.matches
        symbol_contexts = item.symbol_contexts
        mathml_contexts = item.mathml_contexts
        symbol_formulas = item.symbol_formulas
        mathml_formulas = item.mathml_formulas

        symbol_ids_by_symbol_object_ids = {}
        for symbol_with_id in symbols_with_ids:
            symbol_ids_by_symbol_object_ids[id(
                symbol_with_id.symbol)] = symbol_with_id.symbol_id

        entity_infos = []

        for symbol_with_id in symbols_with_ids:
            symbol = symbol_with_id.symbol
            # TODO(andrewhead): move this filtering condition into 'parse_equation'
            if symbol.tex in ["$|$", "|"]:
                continue

            symbol_id = symbol_with_id.symbol_id

            # Get context and formula of the symbol, and other matching ones.
            context = symbol_contexts.get(symbol_id)
            matching_contexts = mathml_contexts.get(symbol.mathml, [])
            other_context_texs = []
            other_context_sentence_ids = []
            for c in matching_contexts:
                matching_sentence_id = f"{c.tex_path}-{c.sentence_id}"
                if (matching_sentence_id not in other_context_sentence_ids
                        # and c.sentence_id != context.sentence_id
                    ):
                    other_context_texs.append(c.snippet)
                    other_context_sentence_ids.append(matching_sentence_id)

            formula = symbol_formulas.get(symbol_id)
            matching_formulas = mathml_formulas.get(symbol.mathml, [])
            other_formula_texs = []
            other_formula_ids = []
            for f in matching_formulas:
                equation_id = f"{f.tex_path}-{f.equation_id}"
                if equation_id not in other_formula_ids:
                    # and (
                    #   :  formula is None or equation_id != formula.equation_id
                    # )
                    other_formula_texs.append(f.tex)
                    other_formula_ids.append(equation_id)

            box = boxes.get(symbol_id)
            if box is None:
                continue

            data: EntityData = {
                "tex":
                f"${symbol.tex}$",
                "tex_start":
                symbol.start,
                "tex_end":
                symbol.end,
                "mathml":
                symbol.mathml,
                "mathml_near_matches":
                [m.matching_mathml for m in matches[symbol.mathml]],
                # "snippet": context.snippet,
                "snippets":
                other_context_texs,
                "defining_formulas":
                other_formula_texs,
                "is_definition":
                symbol.is_definition or False,
            }
            # if formula is not None:
            #     data['formula'] = formula.tex

            create_symbol_id_string: Callable[[SymbolId], str] = (
                lambda sid:
                f"{sid.tex_path}-{sid.equation_index}-{sid.symbol_index}")

            sentence_id = (f"{context.tex_path}-{context.sentence_id}"
                           if context is not None else None)

            parent_id: Optional[str] = None
            for other_symbol_with_id in symbols_with_ids:
                other_symbol_id = other_symbol_with_id.symbol_id
                other_symbol = other_symbol_with_id.symbol
                try:
                    other_symbol.children.index(symbol)
                    parent_id = create_symbol_id_string(other_symbol_id)
                except ValueError:
                    continue

            child_ids = []
            for child_symbol in symbol.children:
                child_symbol_id = symbol_ids_by_symbol_object_ids[id(
                    child_symbol)]
                string_id = create_symbol_id_string(child_symbol_id)
                child_ids.append(string_id)

            relationships: EntityRelationships = {
                "equation":
                EntityReference(
                    type_="equation",
                    id_=f"{symbol_id.tex_path}-{symbol_id.equation_index}",
                ),
                "parent":
                EntityReference(type_="symbol", id_=parent_id),
                "children": [
                    EntityReference(type_="symbol", id_=id_)
                    for id_ in child_ids
                ],
                "sentence":
                EntityReference(type_="sentence", id_=sentence_id) if
                sentence_id is not None else EntityReference(type_="sentence",
                                                             id_=None),
                "defining_formula_equations": [
                    EntityReference(type_="equation", id_=id_)
                    for id_ in other_formula_ids
                ],
                "snippet_sentences": [
                    EntityReference(type_="sentence", id_=id_)
                    for id_ in other_context_sentence_ids
                ],
                # "snippet_sentence": EntityReference(
                #     type_="sentence", id_=f"{symbol_id.tex_path}-f{context.sentence_id}"
                # )
                # if context is not None
                # else None,
                # "formula_equation": EntityReference(
                #     type_="equation",
                #     id_=f"{symbol_id.tex_path}-f{formula.equation_id}"
                #     if formula is not None
                #     else None,
                # ),
            }

            entity_information = EntityInformation(
                id_=
                f"{symbol_id.tex_path}-{symbol_id.equation_index}-{symbol_id.symbol_index}",
                type_="symbol",
                bounding_boxes=[box],
                data=data,
                relationships=relationships,
            )
            entity_infos.append(entity_information)

        upload_entities(item.s2_id, item.arxiv_id, entity_infos,
                        self.args.data_version)

Пример #2

Показать файл

def upload_symbols(
    processing_summary: PaperProcessingResult, data_version: Optional[int]
) -> None:

    arxiv_id = processing_summary.arxiv_id
    entities = [es.entity for es in processing_summary.entities]
    symbols = cast(List[SerializableSymbol], entities)
    symbols_by_id = {sid(s): s for s in symbols}

    entity_infos: List[EntityUploadInfo] = []

    # Load MathML matches for partially matching of symbols.
    matches: Matches = {}
    matches_path = os.path.join(
        directories.arxiv_subdir("symbol-matches", processing_summary.arxiv_id),
        "matches.csv",
    )
    if os.path.exists(matches_path):
        for match in file_utils.load_from_csv(matches_path, Match):
            if match.queried_mathml not in matches:
                matches[match.queried_mathml] = []
            matches[match.queried_mathml].append(match)
    else:
        logging.warning(
            "Could not find symbol matches information for paper %s.", arxiv_id,
        )

    # Load parent-child relationships for symbols.
    children: Dict[SymbolId, List[SymbolId]] = defaultdict(list)
    parents: Dict[SymbolId, SymbolId] = {}
    children_path = os.path.join(
        directories.arxiv_subdir("detected-symbols", arxiv_id), "symbol_children.csv"
    )
    if os.path.exists(children_path):
        for parent in file_utils.load_from_csv(children_path, SerializableChild):
            pid = f"{parent.tex_path}-{parent.equation_index}-{parent.symbol_index}"
            cid = f"{parent.tex_path}-{parent.equation_index}-{parent.child_index}"
            parents[cid] = pid
            children[pid].append(cid)
    else:
        logging.warning(
            "Could not find file mapping from symbol to their children for paper %s.",
            arxiv_id,
        )

    # Load contexts that the symbols appear in. Sort them by the symbol MathML.
    context_data_missing = False
    contexts_path = os.path.join(
        directories.arxiv_subdir("contexts-for-symbols", arxiv_id), "contexts.csv",
    )
    if not os.path.exists(contexts_path):
        logging.warning(  # pylint: disable=logging-not-lazy
            "Contexts have not been found for symbols for arXiv paper %s. "
            + "Symbol data will be uploaded without contexts.",
            arxiv_id,
        )
        context_data_missing = True

    symbol_contexts = {}
    mathml_contexts = defaultdict(list)
    if not context_data_missing:
        for context in file_utils.load_from_csv(contexts_path, Context):
            tex_path = context.tex_path
            symbol_id = f"{tex_path}-{context.entity_id}"
            symbol_contexts[symbol_id] = context
            symbol = symbols_by_id[symbol_id]
            mathml_contexts[symbol.mathml].append(context)

    # Prepare collections of formulae that each symbol was found in.
    symbol_formulas = {}
    mathml_formulas: Dict[str, Set[DefiningFormula]] = defaultdict(set)
    for symbol in symbols:
        if (
            symbol.is_definition
            and symbol.equation is not None
            and symbol.relative_start is not None
            and symbol.relative_end is not None
        ):
            highlighted = wrap_span(
                symbol.equation,
                symbol.relative_start,
                symbol.relative_end,
                before=r"\htmlClass{match-highlight}{",
                after="}",
                braces=True,
            )
            formula = DefiningFormula(
                tex=highlighted,
                tex_path=symbol.tex_path,
                equation_id=str(symbol.equation_index),
            )
            symbol_formulas[sid(symbol)] = formula
            mathml_formulas[symbol.mathml].add(formula)

    entity_infos = []
    for localized_entity in processing_summary.entities:

        symbol = cast(SerializableSymbol, localized_entity.entity)
        boxes = [
            BoundingBox(l.left, l.top, l.width, l.height, l.page)
            for l in localized_entity.locations
        ]

        # Get context and formula of the symbol, and other matching ones.
        symbol_context = symbol_contexts.get(sid(symbol))
        matching_contexts = mathml_contexts.get(symbol.mathml, [])
        other_context_texs = []
        other_context_sentence_ids = []
        for c in matching_contexts:
            matching_sentence_id = f"{c.tex_path}-{c.sentence_id}"
            if matching_sentence_id not in other_context_sentence_ids:
                other_context_texs.append(c.snippet)
                other_context_sentence_ids.append(matching_sentence_id)

        matching_formulas = mathml_formulas.get(symbol.mathml, set())
        other_formula_texs = []
        other_formula_ids = []
        for f in matching_formulas:
            equation_id = f"{f.tex_path}-{f.equation_id}"
            if equation_id not in other_formula_ids:
                other_formula_texs.append(f.tex)
                other_formula_ids.append(equation_id)

        # Package up data for the symbol.
        tags: List[str] = []
        MAX_BOX_HEIGHT = 0.1
        for b in boxes:
            if b.height > MAX_BOX_HEIGHT:
                logging.debug(  # pylint: disable=logging-not-lazy
                    "Detected large bounding box for symbol with height %f for entity %s of paper "
                    + "%s. Entity will be given a tag indicating it is unexpectedly large.",
                    b.height,
                    f"{localized_entity.entity.tex_path}-{localized_entity.entity.id_}",
                    arxiv_id,
                )
                tags.append("large")
                break

        data: EntityData = {
            "tex": f"${symbol.tex}$",
            "tex_start": symbol.start,
            "tex_end": symbol.end,
            "type": symbol.type_,
            "mathml": symbol.mathml,
            "mathml_near_matches": [m.matching_mathml for m in matches[symbol.mathml]],
            "snippets": other_context_texs,
            "defining_formulas": other_formula_texs,
            "is_definition": symbol.is_definition or False,
            "tags": tags,
        }

        # Create links between this symbol, its sentence, and related symbols.
        sentence_id = (
            f"{symbol_context.tex_path}-{symbol_context.sentence_id}"
            if symbol_context is not None
            else None
        )

        parent_id = parents.get(sid(symbol))
        child_ids = children.get(sid(symbol), [])

        relationships: EntityRelationships = {
            "equation": EntityReference(
                type_="equation", id_=f"{symbol.tex_path}-{symbol.equation_index}",
            ),
            "parent": EntityReference(type_="symbol", id_=parent_id),
            "children": [EntityReference(type_="symbol", id_=id_) for id_ in child_ids],
            "sentence": EntityReference(type_="sentence", id_=sentence_id)
            if sentence_id is not None
            else EntityReference(type_="sentence", id_=None),
            "defining_formula_equations": [
                EntityReference(type_="equation", id_=id_) for id_ in other_formula_ids
            ],
            "snippet_sentences": [
                EntityReference(type_="sentence", id_=id_)
                for id_ in other_context_sentence_ids
            ],
        }

        # Save all data for this symbol
        entity_information = EntityUploadInfo(
            id_=sid(symbol),
            type_="symbol",
            bounding_boxes=boxes,
            data=data,
            relationships=relationships,
        )
        entity_infos.append(entity_information)

    upload_entities(
        processing_summary.s2_id, arxiv_id, entity_infos, data_version,
    )

Пример #3

Показать файл

Файл: upload.py Проект: silky/scholarphi

def upload_terms(processing_summary: PaperProcessingResult,
                 data_version: Optional[int]) -> None:

    arxiv_id = processing_summary.arxiv_id
    contexts = file_utils.load_from_csv(
        os.path.join(
            directories.arxiv_subdir("contexts-for-glossary-terms", arxiv_id),
            "contexts.csv",
        ),
        Context,
    )
    contexts_by_entity = {(c.tex_path, c.entity_id): c for c in contexts}

    # Assemble contexts that should be shown for each term.
    contexts_by_term: Dict[str, List[Context]] = defaultdict(list)
    for entity_and_location in processing_summary.localized_entities:
        term = cast(Term, entity_and_location.entity)
        if (term.tex_path, term.id_) in contexts_by_entity:
            contexts_by_term[term.text].append(
                contexts_by_entity[(term.tex_path, term.id_)])

    entity_infos = []
    for entity_and_location in processing_summary.localized_entities:
        term = cast(Term, entity_and_location.entity)
        context = contexts_by_entity.get((term.tex_path, term.id_))
        boxes = [cast(BoundingBox, l) for l in entity_and_location.locations]

        # Cluster bounding boxes, in case any of these terms are defined as a macro (in which)
        # case all appearances of that term on the same page will have been lumped together.
        clusters = cluster_boxes(boxes, vertical_split=0.005)
        for i, cluster in enumerate(clusters):
            entity_info = EntityInformation(
                id_=f"{term.tex_path}-{term.id_}-{i}",
                type_="term",
                bounding_boxes=list(cluster),
                data={
                    "name":
                    term.text,
                    "definitions":
                    term.definitions,
                    "definition_texs":
                    term.definitions,
                    "sources":
                    term.sources,
                    "snippets":
                    [c.snippet for c in contexts_by_term.get(term.text, [])],
                },
                relationships={
                    "sentence":
                    EntityReference(
                        type_="sentence",
                        id_=f"{context.tex_path}-{context.sentence_id}"
                        if context is not None else None,
                    ),
                    "snippet_sentences": [
                        EntityReference(type_="sentence",
                                        id_=f"{c.tex_path}-{c.sentence_id}")
                        for c in contexts_by_term.get(term.text, [])
                    ],
                },
            )
            entity_infos.append(entity_info)

    upload_entities(
        processing_summary.s2_id,
        processing_summary.arxiv_id,
        entity_infos,
        data_version,
    )

Пример #4

Показать файл

def upload_definitions(processing_summary: PaperProcessingResult,
                       data_version: Optional[int]) -> None:

    term_infos = []
    definition_infos = []
    for entity_and_location in processing_summary.localized_entities:
        boxes = [cast(BoundingBox, l) for l in entity_and_location.locations]
        entity = entity_and_location.entity

        if entity.id_.startswith("definition"):
            definition = cast(Definition, entity)
            definition_info = EntityInformation(
                id_=definition.id_,
                type_="definition",
                bounding_boxes=boxes,
                data={
                    "definiendum": definition.definiendum,
                    "definition": definition.text,
                    "tex": definition.tex,
                },
                relationships={
                    "sentence":
                    EntityReference(
                        type_="sentence",
                        id_=f"{definition.tex_path}-{definition.sentence_id}"
                        if definition.sentence_id is not None else None,
                    ),
                },
            )
            definition_infos.append(definition_info)

        if entity.id_.startswith("definiendum") or entity.id_.startswith(
                "term-reference"):
            term = cast(TermReference, entity)
            term_info = EntityInformation(
                id_=term.id_,
                type_="term",
                bounding_boxes=boxes,
                data={
                    "name": term.text,
                    "definitions": term.definitions,
                    "definition_texs": term.definition_texs,
                    "sources": term.sources,
                    "term_type": term.type_ or "unknown"
                },
                relationships={
                    "sentence":
                    EntityReference(
                        type_="sentence",
                        id_=f"{term.tex_path}-{term.sentence_id}"
                        if term.sentence_id is not None else None,
                    ),
                    "definitions": [
                        EntityReference(type_="definition", id_=d)
                        for d in term.definition_ids
                    ],
                },
            )
            term_infos.append(term_info)

    # Upload definitions before terms, because terms hold references to definitions that can
    # only be resolved once the definitions have been uploaded.
    upload_entities(
        processing_summary.s2_id,
        processing_summary.arxiv_id,
        definition_infos,
        data_version,
    )
    upload_entities(
        processing_summary.s2_id,
        processing_summary.arxiv_id,
        term_infos,
        data_version,
    )

Пример #5

Показать файл

    def save(self, item: SymbolData, _: None) -> None:
        symbols_with_ids = item.symbols_with_ids
        boxes = item.boxes
        matches = item.matches
        symbol_sentences = item.symbol_sentences

        symbol_ids_by_symbol_object_ids = {}
        for symbol_with_id in symbols_with_ids:
            symbol_ids_by_symbol_object_ids[id(
                symbol_with_id.symbol)] = symbol_with_id.symbol_id

        entity_infos = []

        for symbol_with_id in symbols_with_ids:
            symbol = symbol_with_id.symbol
            symbol_id = symbol_with_id.symbol_id

            box = boxes.get(symbol_id)
            if box is None:
                continue

            data: EntityData = {
                "tex":
                f"${symbol.tex}$",
                "tex_start":
                symbol.start,
                "tex_end":
                symbol.end,
                "mathml":
                symbol.mathml,
                "mathml_near_matches":
                [m.matching_mathml for m in matches[symbol.mathml]],
            }

            sentence_key = symbol_sentences.get(symbol_id)
            sentence_id = (
                f"{sentence_key.tex_path}-{sentence_key.sentence_id}"
                if sentence_key is not None else None)

            child_ids = []
            for child_symbol in symbol.children:
                child_symbol_id = symbol_ids_by_symbol_object_ids[id(
                    child_symbol)]
                string_id = f"{child_symbol_id.tex_path}-{child_symbol_id.equation_index}-{child_symbol_id.symbol_index}"
                child_ids.append(string_id)

            relationships: EntityRelationships = {
                "children": [
                    EntityReference(type_="symbol", id_=id_)
                    for id_ in child_ids
                ],
                "sentence":
                EntityReference(type_="sentence", id_=None)
                if sentence_id is None else EntityReference(type_="sentence",
                                                            id_=sentence_id),
            }

            entity_information = EntityInformation(
                id_=
                f"{symbol_id.tex_path}-{symbol_id.equation_index}-{symbol_id.symbol_index}",
                type_="symbol",
                bounding_boxes=[box],
                data=data,
                relationships=relationships,
            )
            entity_infos.append(entity_information)

        upload_entities(item.s2_id, item.arxiv_id, entity_infos,
                        self.args.data_version)

Пример #6

Показать файл

def upload_term_definitions(
    processing_summary: PaperProcessingResult, data_version: Optional[int]
) -> None:
    " Upload textual terms and their definitions. "

    # Group contextual snippets for each term.
    term_infos = []
    contexts_by_term_name: Dict[TermName, List[Context]] = defaultdict(list)
    for entity_summary in processing_summary.entities:
        entity = entity_summary.entity
        context = entity_summary.context
        if is_textual_term(entity) and context is not None:
            contexts_by_term_name[entity.text].append(context)  # type: ignore

    # Construct mapping from definitions to the sentences that contain them.
    contexts_by_definition: Dict[EntityId, Context] = {}
    for entity_summary in processing_summary.entities:
        entity_id = entity_summary.entity.id_
        context = entity_summary.context
        if (entity_id.startswith("definition")) and context is not None:
            contexts_by_definition[entity_id] = context

    # Upload information for each term.
    for entity_summary in processing_summary.entities:
        boxes = [cast(BoundingBox, l) for l in entity_summary.locations]
        entity = entity_summary.entity
        context = entity_summary.context

        if not is_textual_term(entity):
            continue

        term = cast(TermReference, entity)

        # Assemble list of snippets that include this term.
        contexts_matching_term = contexts_by_term_name.get(term.text, [])
        snippets = [c.snippet for c in contexts_matching_term]
        snippet_sentences = [
            f"{c.tex_path}-{c.sentence_id}" for c in contexts_matching_term
        ]

        # Create links to the sentences containing definitions for this term.
        definition_sentences: List[Optional[str]] = []
        for definition_id in term.definition_ids:
            if definition_id not in contexts_by_definition:
                definition_sentences.append(None)
            definition_context = contexts_by_definition[definition_id]
            definition_sentences.append(
                f"{definition_context.tex_path}-{definition_context.sentence_id}"
            )

        term_info = EntityUploadInfo(
            id_=term.id_,
            type_="term",
            bounding_boxes=boxes,
            data={
                "name": term.text,
                "term_type": term.type_ or "unknown",
                "definitions": term.definitions,
                "definition_texs": term.definition_texs,
                "sources": term.sources,
                # A list of all other sentences the term appearse elsewhere in the paper.
                "snippets": snippets,
            },
            relationships={
                # Link the term to the sentence it belongs to. This link is necessary to enable
                # visual filtering in the UI where, when a term is clicked, the sentence is
                # highlighted and all others are lowlighted.
                "sentence": EntityReference(
                    type_="sentence",
                    id_=f"{context.tex_path}-{context.sentence_id}"
                    if context is not None
                    else None,
                ),
                # IDs of the sentences that contain each of the definitions for a term. These IDs
                # can be used to establish links that take a user to the site of a definition.
                "definition_sentences": [
                    EntityReference(type_="sentence", id_=id_)
                    for id_ in definition_sentences
                ],
                # The IDs of each sentence where the term appears elsewhere in the paper (i.e.,
                # for each of the 'snippets' in the entity data above. Used to link from a snippet
                # that is shown in a list of snippets to where that snippet appears in the paper.
                "snippet_sentences": [
                    EntityReference(type_="sentence", id_=id_)
                    for id_ in snippet_sentences
                ],
            },
        )
        term_infos.append(term_info)

    upload_entities(
        processing_summary.s2_id, processing_summary.arxiv_id, term_infos, data_version,
    )

Пример #7

Показать файл

def upload_symbol_definitions(
    processing_summary: PaperProcessingResult, data_version: Optional[int]
) -> None:
    " Upload symbols and their definitions. "

    # Associate definitions with symbols as follows:
    # Definitions will be associated with entire equations as per the current implementation
    # of the definition detector. Conservatively, associate a definition for an equation
    # with a single symbol only if that symbol is the *only* top-level symbol in that equation.

    # Load symbols from files. Group symbols by equation to make it easy to detect whether a
    # symbol is the only top-level symbol in the equation.
    symbols_by_equation: Dict[
        Tuple[TexPath, EquationIndex], List[Symbol]
    ] = defaultdict(list)
    symbols: List[Symbol] = []

    symbols_with_ids = file_utils.load_symbols(processing_summary.arxiv_id)
    if symbols_with_ids is None:
        logging.info(  # pylint: disable=logging-not-lazy
            "No symbols were loaded for paper %s. Therefore, no definitions for symbols "
            + "will be uploaded for this paper.",
            processing_summary.arxiv_id,
        )
        return

    for _, symbol in symbols_with_ids:
        symbols_by_equation[symbol.tex_path, symbol.equation_index].append(symbol)
        symbols.append(symbol)

    # Group symbols by their MathML. These groups will be used to propagate definitions from
    # one defined symbol to all other appearances of that symbol.
    symbols_by_mathml: Dict[MathML, List[Symbol]] = defaultdict(list)
    for symbol in symbols:
        symbols_by_mathml[symbol.mathml].append(symbol)

    # Construct map from definitions to the sentences that contain them.
    contexts_by_definition: Dict[EntityId, Context] = {}
    for entity_summary in processing_summary.entities:
        entity_id = entity_summary.entity.id_
        context = entity_summary.context
        if (entity_id.startswith("definition")) and context is not None:
            contexts_by_definition[entity_id] = context

    # Fetch rows for all entities for this paper that have already been uploaded to the database.
    # This allows lookup of the row IDs for the sentence that contain definitions of symbols.
    entity_models = fetch_entity_models(processing_summary.s2_id, data_version)

    # Create a list of rows to insert into the database containing definition data.
    entity_data_models: List[EntityDataModel] = []
    for entity_summary in processing_summary.entities:
        entity = entity_summary.entity
        if not entity.id_.startswith("definiendum"):
            continue

        # Attempt to match definienda (defined terms) to symbols that are being defined.
        definiendum = cast(Definiendum, entity)
        defined_symbol = None
        for symbol in symbols:
            # Is the definiendum an equation?
            if definiendum.type_ != "symbol":
                continue
            # Does the symbol fall within in the range of characters being defined?
            if symbol.start < definiendum.start or symbol.end > definiendum.end:
                continue
            # Is the symbol a top-level symbol?
            if symbol.parent is not None:
                continue
            # Is it the *only* top-level symbol in its equation?
            top_level_symbols_in_equation = filter(
                lambda s: s.parent is not None,
                symbols_by_equation[(symbol.tex_path, symbol.equation_index)],
            )
            if len(list(top_level_symbols_in_equation)) > 1:
                continue

            defined_symbol = symbol
            logging.debug(  # pylint: disable=logging-not-lazy
                "Matched definiedum %s at position (%d, %d) to symbol %s at position "
                + "(%s, %s) for paper %s. A definition for this symbol will be uploaded.",
                definiendum.tex,
                definiendum.start,
                definiendum.end,
                symbol.tex,
                symbol.start,
                symbol.end,
                processing_summary.arxiv_id,
            )
            break

        if defined_symbol is None:
            continue

        # Assemble data about definitions for the symbol.
        definitions = definiendum.definitions
        definition_texs = definiendum.definition_texs
        sources = definiendum.sources
        definition_sentence_ids: List[Optional[str]] = []
        for definition_id in definiendum.definition_ids:
            context = contexts_by_definition.get(definition_id)
            if context is None:
                definition_sentence_ids.append(None)
            else:
                definition_sentence_ids.append(
                    f"{context.tex_path}-{context.sentence_id}"
                )

        # Find all symbols that are the same (i.e., that have the same MathML representation).
        # Then save definition data so that it applies all of those symbols.
        matching_symbols = symbols_by_mathml.get(defined_symbol.mathml)
        if matching_symbols is not None:
            for s in matching_symbols:
                entity_model = entity_models.get(("symbol", sid(s)))
                data: EntityData = {
                    "definitions": definitions,
                    "definition_texs": definition_texs,
                    "sources": sources,
                }
                entity_data_models.extend(make_data_models(None, entity_model, data))

                relationships: EntityRelationships = {
                    "definition_sentences": [
                        EntityReference(type_="sentence", id_=id_)
                        for id_ in definition_sentence_ids
                    ],
                }
                entity_data_models.extend(
                    make_relationship_models(
                        ("symbol", sid(s)), relationships, entity_models
                    )
                )

    with output_database.atomic():
        EntityDataModel.bulk_create(entity_data_models, 200)