示例#1
0
def main(args):
    if not args.as_array and not args.as_extra:
        args.as_extra = True
    for spec in read_specs(args, converters=FROM_FORMAT_NO_PLACEHOLDERS):
        kwargs = dict(as_array=args.as_array,
                      as_extra=args.as_extra,
                      verbose=args.verbose,
                      lang=spec.lang)
        passages = spec.passages
        if spec.conllu:
            passages = copy_annotation(passages,
                                       spec.conllu,
                                       by_id=args.by_id,
                                       **kwargs)
        elif spec.udpipe:
            passages = annotate_udpipe(passages, spec.udpipe, **kwargs)
        elif spec.stanfordnlp:
            passages = annotate_stanfordnlp(passages, spec.stanfordnlp,
                                            **kwargs)
        for passage in annotate_all(passages if args.verbose else tqdm(
                passages, unit=" passages", desc="Annotating " + spec.out_dir),
                                    replace=spec.conllu
                                    or not (spec.udpipe or spec.stanfordnlp),
                                    **kwargs):
            if passage.extra.get("format") == "amr" and args.as_array:
                from semstr.conversion.amr import AmrConverter
                AmrConverter.introduce_placeholders(passage)
            write_passage(passage,
                          outdir=spec.out_dir,
                          verbose=args.verbose,
                          binary=args.binary)
示例#2
0
def to_amr(passage,
           metadata=True,
           wikification=True,
           use_original=True,
           verbose=False,
           default_label=None,
           **kwargs):
    """ Convert from a Passage object to a string in AMR PENMAN format (export)

    :param passage: the Passage object to convert
    :param metadata: whether to print ::id and ::tok lines
    :param wikification: whether to wikify named concepts, adding a :wiki triple
    :param use_original: whether to use original AMR text from passage.extra
    :param verbose: whether to print extra information
    :param default_label: label to use in case node has no label attribute

    :return list of lines representing an AMR in PENMAN format, constructed from the passage
    """
    from semstr.conversion.amr import AmrConverter
    return AmrConverter().to_format(passage,
                                    metadata,
                                    wikification,
                                    verbose,
                                    use_original=use_original,
                                    default_label=default_label,
                                    format=kwargs.get("format"))
示例#3
0
def from_amr(lines, passage_id=None, return_original=False, save_original=True, wikification=False, placeholders=True,
             **kwargs):
    """Converts from parsed text in AMR PENMAN format to a Passage object.

    :param lines: iterable of lines in AMR PENMAN format, describing a single passage.
    :param passage_id: ID to set for passage, overriding the ID from the file
    :param save_original: whether to save original AMR text in passage.extra
    :param return_original: return triple of (UCCA passage, AMR string, AMR ID)
    :param wikification: whether to use wikification for replacing node labels with placeholders based on tokens
    :param placeholders: introduce placeholders into node labels when they include the terminal's text?

    :return generator of Passage objects
    """
    from semstr.conversion.amr import AmrConverter
    return AmrConverter().from_format(lines, passage_id=passage_id, return_original=return_original,
                                      save_original=save_original, wikification=wikification, placeholders=placeholders,
                                      format=kwargs.get("format"))
示例#4
0
def from_amr(lines,
             passage_id=None,
             return_original=False,
             save_original=True,
             *args,
             **kwargs):
    """Converts from parsed text in AMR PENMAN format to a Passage object.

    :param lines: iterable of lines in AMR PENMAN format, describing a single passage.
    :param passage_id: ID to set for passage, overriding the ID from the file
    :param save_original: whether to save original AMR text in passage.extra
    :param return_original: return triple of (UCCA passage, AMR string, AMR ID)

    :return generator of Passage objects
    """
    del args, kwargs
    from semstr.conversion.amr import AmrConverter
    return AmrConverter().from_format(lines,
                                      passage_id,
                                      return_original=return_original,
                                      save_original=save_original)
示例#5
0
def to_amr(passage,
           metadata=True,
           wikification=True,
           use_original=True,
           verbose=False,
           *args,
           **kwargs):
    """ Convert from a Passage object to a string in AMR PENMAN format (export)

    :param passage: the Passage object to convert
    :param metadata: whether to print ::id and ::tok lines
    :param wikification: whether to wikify named concepts, adding a :wiki triple
    :param use_original: whether to use original AMR text from passage.extra
    :param verbose: whether to print extra information

    :return list of lines representing an AMR in PENMAN format, constructed from the passage
    """
    del args, kwargs
    from semstr.conversion.amr import AmrConverter
    return AmrConverter().to_format(passage,
                                    metadata,
                                    wikification,
                                    verbose,
                                    use_original=use_original)