Python TimestampCache примеры использования

Язык программирования: Python

Пространство имен/Пакет: educe.stac.util.glozz

Класс/Тип: TimestampCache

Примеров на hotexamples.com: 5

Python TimestampCache - 5 примеров найдено. Это лучшие примеры Python кода для educe.stac.util.glozz.TimestampCache, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

TimestampCache(4)

reset(1)

Основные методы

TimestampCache (4)

reset (1)

Пример #1

Показать файл

Файл: clean_emoticons.py Проект: kowey/educe

def main(args):
    """
    Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`
    """
    corpus = read_corpus_with_unannotated(args)
    postags = educe.stac.postag.read_tags(corpus, args.corpus)
    tcache = TimestampCache()
    output_dir = get_output_dir(args, default_overwrite=True)

    families = collections.defaultdict(list)
    discourse_subcorpus = {}
    for k in corpus:
        fam = (k.doc, k.subdoc)
        families[fam].append(k)
        if k.stage == 'discourse':
            discourse_subcorpus[fam] = k

    for fam in sorted(families):
        print(family_banner(fam[0], fam[1], families[fam]))
        disc_k = discourse_subcorpus[fam]

        doc = corpus[disc_k]
        turns, warn_turns = turns_with_final_emoticons(doc, postags[disc_k])

        warnings = []
        if warn_turns:
            warnings.append("Note: These turns have emoticon-only EDUs that "
                            "I dare not touch because they either "
                            "participate in relations or CDUs: ")
            warnings.extend(" " + doc.text(x.text_span()) for x in warn_turns)
            warnings.append("If the "
                            "relations can be removed, or the CDUs reduced, "
                            "please do this by hand and re-run the script:")

        if not turns:
            warnings.append("Skipping %s (and related); no offending emoticons"
                            % disc_k)

        print("\n".join(warnings))

        if not turns:
            continue

        turn_spans = [x.text_span() for x in turns]
        for k in families[fam]:
            doc = copy.deepcopy(corpus[k])
            tags = postags[k]
            merge_final_emoticons(tcache, turn_spans, doc, tags)
            if k == discourse_subcorpus[fam]:
                for turn_span in turn_spans:
                    print(show_diff(corpus[k], doc, span=turn_span))
                    print()
            save_document(output_dir, k, doc)
        tcache.reset()
    announce_output_dir(output_dir)

Пример #2

Показать файл

def main(args):
    """
    Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`
    """
    corpus = read_corpus_with_unannotated(args)
    postags = educe.stac.postag.read_tags(corpus, args.corpus)
    tcache = TimestampCache()
    output_dir = get_output_dir(args, default_overwrite=True)

    families = collections.defaultdict(list)
    discourse_subcorpus = {}
    for k in corpus:
        fam = (k.doc, k.subdoc)
        families[fam].append(k)
        if k.stage == 'discourse':
            discourse_subcorpus[fam] = k

    for fam in sorted(families):
        print(family_banner(fam[0], fam[1], families[fam]))
        disc_k = discourse_subcorpus[fam]

        doc = corpus[disc_k]
        turns, warn_turns = turns_with_final_emoticons(doc, postags[disc_k])

        warnings = []
        if warn_turns:
            warnings.append("Note: These turns have emoticon-only EDUs that "
                            "I dare not touch because they either "
                            "participate in relations or CDUs: ")
            warnings.extend(" " + doc.text(x.text_span()) for x in warn_turns)
            warnings.append("If the "
                            "relations can be removed, or the CDUs reduced, "
                            "please do this by hand and re-run the script:")

        if not turns:
            warnings.append(
                "Skipping %s (and related); no offending emoticons" % disc_k)

        print("\n".join(warnings))

        if not turns:
            continue

        turn_spans = [x.text_span() for x in turns]
        for k in families[fam]:
            doc = copy.deepcopy(corpus[k])
            tags = postags[k]
            merge_final_emoticons(tcache, turn_spans, doc, tags)
            if k == discourse_subcorpus[fam]:
                for turn_span in turn_spans:
                    print(show_diff(corpus[k], doc, span=turn_span))
                    print()
            save_document(output_dir, k, doc)
        tcache.reset()
    announce_output_dir(output_dir)

Пример #3

Показать файл

Файл: split_dialogue.py Проект: moreymat/educe

def main(args):
    """
    Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`
    """
    corpus = read_corpus(args, verbose=True)
    tcache = TimestampCache()
    output_dir = get_output_dir(args, default_overwrite=True)

    for key in corpus:
        print(key)
        new_doc = corpus[key]
        old_doc = copy.deepcopy(new_doc)
        span = _split_dialogue(tcache, new_doc, args.turn)
        diffs = _mini_diff(key, args, old_doc, new_doc, span)
        print("\n".join(diffs).encode('utf-8'), file=sys.stderr)
        save_document(output_dir, key, new_doc)
        commit_info = CommitInfo(key=key,
                                 before=old_doc,
                                 after=new_doc,
                                 span=span,
                                 tid=args.turn)
    announce_output_dir(output_dir)
    if commit_info and not args.no_commit_msg:
        print("-----8<------")
        print(commit_msg(commit_info))

Пример #4

Показать файл

Файл: merge_edus.py Проект: Sablayrolles/debates

def main(args):
    """
    Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`
    """
    corpus = read_corpus_with_unannotated(args)
    tcache = TimestampCache()
    output_dir = get_output_dir(args, default_overwrite=True)
    commit_info = None
    for k in corpus:
        old_doc = corpus[k]
        new_doc = copy.deepcopy(old_doc)
        _merge_edus(tcache, args.span, new_doc)
        diffs = _mini_diff(k, old_doc, new_doc, args.span)
        print("\n".join(diffs).encode('utf-8'), file=sys.stderr)
        save_document(output_dir, k, new_doc)
        # for commit message generation
        commit_info = CommitInfo(key=k,
                                 annotator=args.annotator,
                                 before=old_doc,
                                 after=new_doc,
                                 span=args.span)
    announce_output_dir(output_dir)
    if commit_info and not args.no_commit_msg:
        print("-----8<------")
        print(commit_msg(commit_info))

Пример #5

Показать файл

Файл: fix_dialogue_boundaries.py Проект: popescuv/irit-stac

def fix_dialogue_boundaries(dir_ling, dir_situ, doc, seg_path=None):
    """Fix dialogue boundaries in a woven game.

    Dialogue boundaries are adjusted in the woven version, so they
    are tighter around the dialogues that existed in the annotated
    version.

    Parameters
    ----------
    dir_ling: filepath
        Path to the folder of the original version of the game.
    dir_situ: filepath
        Path to the folder of the woven version of the game.
    doc: string
        Name of the game.
    seg_path: TODO
        TODO ?
    """
    # select files for this game only, annotator GOLD
    is_interesting = lambda k: (k.doc == doc
                                and (k.annotator == 'GOLD'
                                     or k.annotator is None))

    # locate files
    dir_ling = os.path.abspath(dir_ling)
    reader_ling = Reader(dir_ling)
    files_ling = reader_ling.filter(reader_ling.files(), is_interesting)
    corpus_ling = reader_ling.slurp(cfiles=files_ling, verbose=True)

    dir_situ = os.path.abspath(dir_situ)
    reader_situ = Reader(dir_situ)
    files_situ = reader_situ.filter(reader_situ.files(), is_interesting)
    corpus_situ = reader_situ.slurp(cfiles=files_situ, verbose=True)
    # need a TimestampCache to generate unit_id for new dialogues
    tcache = TimestampCache()

    for key, doc_situ in sorted(corpus_situ.items()):
        doc_ling = corpus_ling[key]
        print(key)
        doc_situ_fixed = _fix_dialogue_boundaries(tcache, doc_ling, doc_situ)
        # DEBUG
        dlgs = sorted((x for x in doc_situ_fixed.units if is_dialogue(x)),
                      key=lambda x: x.span)
        dlg_beg = [x.span.char_start for x in dlgs]
        dlg_end = [x.span.char_end for x in dlgs]
        print(zip(dlg_beg, dlg_end))
        # end DEBUG
        save_document(dir_situ, key, doc_situ_fixed)