示例#1
0
def run(args):
    ds = Dataset()
    alms = Alignments(ds.dir.joinpath('workflow',
                                      'D_Chen_partial.tsv').as_posix(),
                      ref='cogids')
    alms.add_entries(
        'structure', 'tokens', lambda x: basictypes.lists(' + '.join(
            [' '.join(y) for y in segments.get_structure(x)])))
    print('[i] added segments')
    D = {0: [c for c in alms.columns]}
    for idx, tokens, structure in alms.iter_rows('tokens', 'structure'):
        if len(tokens.n) != len(structure.n):
            print('[!!!]', tokens, structure)
        elif len(tokens) != len(structure):
            print('[!]', tokens, structure)
        else:
            D[idx] = alms[idx]
    alms = Alignments(D, ref='cogids')

    template_alignment(alms,
                       ref='cogids',
                       template='imnct+imnct+imnct+imnct+imnct+imnct',
                       structure='structure',
                       fuzzy=True,
                       segments='tokens')

    alms.output('tsv',
                filename=ds.dir.joinpath('workflow',
                                         'D_Chen_aligned').as_posix(),
                prettify=False)
def run(args):
    ds = Dataset()
    alms = Alignments(ds.dir.joinpath('workflow', 'D_Chen_aligned.tsv').as_posix(), ref='cogids')
    find_bad_internal_alignments(alms)
    
    find_colexified_alignments(
            alms,
            cognates='cogids',
            segments='tokens',
            ref='crossids'
            )
    
    # re-align the data
    template_alignment(alms,
                       ref='crossids',
                       template='imnct+imnct+imnct+imnct+imnct+imnct',
                       structure = 'structure',
                       fuzzy=True,
                       segments='tokens')
    
    alms.output('tsv', filename=ds.dir.joinpath('workflow',
        'D_Chen_crossids').as_posix(), prettify=False)
from sys import argv

if 'all' in argv:
    fname = '../output/A_Deepadung_'
else:
    fname = '../output/D_Deepadung_'

alms = Alignments(fname + 'partial.tsv', ref='cogids')

alms.add_entries(
    'structure', 'tokens', lambda x: basictypes.lists(' + '.join(
        [' '.join(y) for y in segments.get_structure(x)])))
print('[i] added segments')
D = {0: [c for c in alms.columns]}
for idx, tokens, structure in alms.iter_rows('tokens', 'structure'):
    if len(tokens.n) != len(structure.n):
        print('[!!!]', tokens, structure)
    elif len(tokens) != len(structure):
        print('[!]', tokens, structure)
    else:
        D[idx] = alms[idx]
alms = Alignments(D, ref='cogids')

template_alignment(alms,
                   ref='cogids',
                   template='imnc+imnc+imnc+imnc+imnc+imnc',
                   structure='structure',
                   fuzzy=True,
                   segments='tokens')
alms.output('tsv', filename=fname + 'aligned_structure', prettify=False)