Python make_extract_contexts_command示例

编程语言: Python

命名空间/包名称: entities.sentences.commands.extract_contexts

方法/功能: make_extract_contexts_command

hotexamples.com的示例: 4

Python make_extract_contexts_command - 已找到4个示例。这些是从开源项目中提取的最受好评的entities.sentences.commands.extract_contexts.make_extract_contexts_command现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： common.py 项目： z-314/scholarphi

def create_entity_localization_command_sequence(
    entity_name: str,
    EntityExtractorType: Type[EntityExtractor],
    extract_contexts: bool = False,
    DetectedEntityType: Optional[Type[SerializableEntity]] = None,
    upload_func: Optional[EntityUploadCallable] = None,
    colorize_options: ColorizeOptions = ColorizeOptions(),
    colorize_func: Optional[ColorizeFunc] = None,
) -> List[Type[Command]]:  # type: ignore
    """
    Create a set of commands that can be used to locate a new type of entity. In the simplest case,
    all you have to provide is and 'entity_name' to be used for naming output files, and
    'entity_type' that can be used to filter which commands are being run when you the full
    pipeline is run, and an 'EntityExtractorType' that locates all instances of that entity in the
    TeX. This function creates the commands necessary to colorize the entities, compile the
    LaTeX, raster the pages, and locate the colors in the pages. You may define additional
    paramters (e.g., 'colorize_options') to fine-tune the commands.

    To extract the contexts for an entity (i.e., the sentences in which the entities appear),
    set 'extract_contexts' to True.

    If you are trying to find the locations of a new type of entity, it is highly recommended that
    you use this convenience methods instead of creating new commands yourself.
    """

    commands: CommandList = []

    directories.register(f"detected-{entity_name}")
    commands.append(
        make_detect_entities_command(entity_name, EntityExtractorType))

    if extract_contexts:
        directories.register(f"contexts-for-{entity_name}")
        commands.append(make_extract_contexts_command(entity_name))

    directories.register(f"sources-with-colorized-{entity_name}")
    directories.register(f"compiled-sources-with-colorized-{entity_name}")
    directories.register(f"paper-images-with-colorized-{entity_name}")
    directories.register(f"diffed-images-with-colorized-{entity_name}")
    directories.register(f"{entity_name}-locations")
    commands.append(
        make_locate_entities_command(entity_name, None, DetectedEntityType,
                                     colorize_options, colorize_func))

    if upload_func is not None:
        upload_command = make_upload_entities_command(
            entity_name, upload_func, DetectedEntityType=DetectedEntityType)
        commands.append(upload_command)

    return commands

示例#2

显示文件

文件： __init__.py 项目： z-314/scholarphi

def entity_key_for_contexts(entity: SerializableEntity) -> Any:
    """
    When constructing snippets for the contexts symbols appear in, determine whether
    two symbols should be highlighted as the 'same symbol' using their MathML.
    """
    symbol = cast(SerializableSymbol, entity)
    return symbol.mathml


commands = [
    ExtractSymbols,
    FindSymbolMatches,
    make_extract_contexts_command(
        "symbols",
        EntityType=SerializableSymbol,
        entity_key=entity_key_for_contexts,
        tex_wrapper=TexWrapper(before=r"\htmlClass{match-highlight}{",
                               after="}",
                               braces=True),
    ),
    make_locate_entities_command(
        "equation-tokens",
        DetectedEntityType=SerializableToken,
        colorize_options=ColorizeOptions(
            adjust_color_positions=adjust_color_positions,
            braces=True,
            when=filter_atom_tokens,
        ),
    ),
    make_locate_entities_command(
        "symbols-with-affixes",
        input_entity_name="symbols",

示例#3

显示文件

@dataclass(frozen=True)
class EntityWithType(SerializableEntity):
    type_: str


def exclude_symbols(entity: SerializableEntity) -> bool:
    if entity.id_.startswith("definiendum") or entity.id_.startswith("term"):
        return cast(EntityWithType, entity).type_ != "symbol"
    return True


commands: CommandList = [
    TokenizeSentences,
    CreateAnnotationFiles,
    DetectDefinitions,
    make_extract_contexts_command(entity_name="definitions"),
    make_locate_entities_command(
        "definitions",
        DetectedEntityType=EntityWithType,
        # Do not locate terms that are symbols because these will already be detect more
        # robustly in dedicated commands for symbol localization.
        colorize_options=ColorizeOptions(when=exclude_symbols),
    ),
    upload_command,
]

definitions_pipeline = EntityPipeline(
    "definitions",
    commands,
    depends_on=["symbols", "sentences"],
)

示例#4

显示文件

from .colorize import adjust_color_positions
from .extractor import GlossaryTermExtractor
from .upload import upload_terms

commands = create_entity_localization_command_sequence(
    "glossary-terms",
    GlossaryTermExtractor,
    DetectedEntityType=Term,
    colorize_options=ColorizeOptions(adjust_color_positions=adjust_color_positions),
    upload_func=upload_terms,
)

# Before uploading entities, extract contexts that each term appeared in.
upload_command_index = len(commands)
for i, command in enumerate(commands):
    if command.get_name() == "upload-glossary-terms":
        upload_command_index = i

directories.register("contexts-for-glossary-terms")
commands.insert(
    upload_command_index,
    make_extract_contexts_command(
        "glossary-terms",
        EntityType=Term,
        tex_wrapper=TexWrapper(before="**", after="**"),
    ),
)

terms_pipeline = EntityPipeline("glossary-terms", commands)
register_entity_pipeline(terms_pipeline)