Exemplo n.º 1
0
def process_doc(book_list, outfile_name):
    ordered_doc = Document()

    for book in book_list:
        for _, sent in tree_dic[book]:
            bund = ordered_doc.create_bundle()
            bund.add_tree(sent)

    for block in blocks:
        block.apply_on_document(ordered_doc)

    if outfile_name:
        ordered_doc.store_conllu(outfile_name)
Exemplo n.º 2
0
def load():
    from udapi.core.document import Document
    load, read, write, text, relchain, save = [], [], [], [], [], []
    for _ in range(30):
        start = timeit.default_timer()
        document = Document()
        document.load_conllu('cs-ud-train-l.conllu')
        end = timeit.default_timer()
        load.append(end - start)

        start = timeit.default_timer()
        for bundle in document:
            for root in bundle:
                for node in root.descendants:
                    form_lemma = node.form + node.lemma
        end = timeit.default_timer()
        read.append(end - start)

        start = timeit.default_timer()
        for bundle in document:
            for root in bundle:
                chain = [n for n in root.descendants if n.deprel == "case" and n.parent.deprel == "nmod"]
        end = timeit.default_timer()
        relchain.append(end - start)

        start = timeit.default_timer()
        for bundle in document:
            for root in bundle:
                for node in root.descendants:
                    node.deprel = 'dep'
        end = timeit.default_timer()
        write.append(end - start)

        start = timeit.default_timer()
        for bundle in document:
            for root in bundle:
                root.compute_text()
        end = timeit.default_timer()
        text.append(end - start)

        start = timeit.default_timer()
        document.store_conllu('hello.conllu')
        end = timeit.default_timer()
        save.append(end - start)

    for x, y in [('load', load), ('read', read), ('write', write), ('text', text), ('relchain', relchain), ('save', save)]:
        print("{}\t{} +/- {}".format(x, round(np.mean(y), 2), round(np.std(y), 2)))
Exemplo n.º 3
0
def load():
    from udapi.core.document import Document
    document = Document()
    document.load_conllu('cs-ud-train-l.conllu')

    for bundle in document:
        for root in bundle:
            for node in root.descendants:
                form_lemma = node.form + node.lemma

    for bundle in document:
        for root in bundle:
            chain = [n for n in root.descendants if n.parent.deprel == "det" and n.parent.parent.deprel == "obj"]

    for bundle in document:
        for root in bundle:
            for node in root.descendants:
                node.deprel = 'dep'

    for bundle in document:
        for root in bundle:
            root.compute_text()

    document.store_conllu('hello.conllu')
from collections import defaultdict
import re

tst_file = "./data/artificial_sentences.xml"

doc = Document()
reader = AgldtReader(tst_file, fix_cycles=True)
reader.apply_on_document(doc)
#trees = [b.get_tree() for b in doc.bundles]

blocks = [
    SetSpaceAfter(),
    CreateUpos(),
    CreateFeats(),
    SetMember(),
    ShallowConverter(),
    ShiftArtificials(),
    SubTreeConverter(with_enhanced=True),
    FixObj(),
    # SetArtificials(), MakeEnhanced(), # COMMENT OUT if you DO NOT want empty nodes and enhanced deps
    RehangPunct(),
    FixSomePos(),
    PurgeMisc(),
    UpdateText()
]

for block in blocks:
    block.apply_on_document(doc)

doc.store_conllu("./data/non_transformed_artificials.conllu")
Exemplo n.º 5
0
for book in book_list:
    for _, sent in tree_dic[book]:
        bund = ordered_doc.create_bundle()
        bund.add_tree(sent)

outname = args.out

blocks = [
    SetSpaceAfter(),
    CreateUpos(),
    CreateFeats(),
    SetMember(),
    ShallowConverter(),
    ShiftArtificials(),
    SubTreeConverter(with_enhanced=True),
    FixObj(),
    SetArtificials(),
    MakeEnhanced(
    ),  # COMMENT OUT if you DO NOT want empty nodes and enhanced deps
    RehangPunct(),
    FixSomePos(),
    PurgeMisc(),
    UpdateText()
]

for block in blocks:
    block.apply_on_document(ordered_doc)

if outname:
    ordered_doc.store_conllu(outname)