Пример #1
0
def pos(word, out, in_pickle):
    WORD = util.read_annotation(word)
    OUT = {}
    pos = pickle.load(open(in_pickle, 'rb'))

    for tokid in WORD:
        OUT[tokid] = pos.get(WORD[tokid],'OTHER').rstrip('0123456789*abcdefghijklmnopqrstuvwxyz')

    util.clear_annotation(out)
    util.write_annotation(out, OUT)
Пример #2
0
def make_longrafsi(rafsi, out):
    RAFSI = util.read_annotation(rafsi)
    OUT = {}

    for tokid in RAFSI:
        rafsi = RAFSI[tokid].split("|")
        if rafsi and rafsi[0]:
            OUT[tokid] = "|".join(expand(a) for a in rafsi)

    util.clear_annotation(out)
    util.write_annotation(out, OUT)
Пример #3
0
def make_longrafsi(rafsi, out, in_pickle):
    RAFSI = util.read_annotation(rafsi)
    OUT = {}
    rafsi_dict = pickle.load(open(in_pickle, 'rb'))

    for tokid in RAFSI:
        rafsi = RAFSI[tokid].split("|")
        if rafsi and rafsi[0]:
            OUT[tokid] = "|".join(rafsi_dict.get(r, 'UNDEF') for r in rafsi)

    util.clear_annotation(out)
    util.write_annotation(out, OUT)
Пример #4
0
def pos(word, out, in_pickle):
    WORD = util.read_annotation(word)
    OUT = {}
    pos = pickle.load(open(in_pickle, 'rb'))

    for tokid in WORD:
        OUT[tokid] = pos.get(
            WORD[tokid],
            'OTHER').rstrip('0123456789*abcdefghijklmnopqrstuvwxyz')

    util.clear_annotation(out)
    util.write_annotation(out, OUT)
Пример #5
0
def make_longrafsi(rafsi, out, in_pickle):
    RAFSI = util.read_annotation(rafsi)
    OUT = {}
    rafsi_dict = pickle.load(open(in_pickle, 'rb'))

    for tokid in RAFSI:
        rafsi = RAFSI[tokid].split("|")
        if rafsi and rafsi[0]:
            OUT[tokid] = "|".join(rafsi_dict.get(r, 'UNDEF') for r in rafsi)

    util.clear_annotation(out)
    util.write_annotation(out, OUT)
Пример #6
0
def make_rafsi(word, pos, out):
    WORD = util.read_annotation(word)
    POS = util.read_annotation(pos)
    OUT = {}

    for tokid in WORD:
        w = WORD[tokid]
        if POS[tokid] == "OTHER":
            rafsi = compound_to_affixes(w)
            OUT[tokid] = "|".join(rafsi)

    util.clear_annotation(out)
    util.write_annotation(out, OUT)
Пример #7
0
def make_rafsi(word, pos, out):
    WORD = util.read_annotation(word)
    POS = util.read_annotation(pos)
    OUT = {}

    for tokid in WORD:
        w = WORD[tokid]
        if POS[tokid] == "OTHER":
            rafsi = compound_to_affixes(w)
            OUT[tokid] = "|".join(rafsi)

    util.clear_annotation(out)
    util.write_annotation(out, OUT)
Пример #8
0
def experimental(word, tai, out, in_pickle):
    WORD = util.read_annotation(word)
    TAI = util.read_annotation(tai)
    catni = pickle.load(open(in_pickle, 'rb'))
    OUT = {}

    for tokid in WORD:
        if TAI[tokid] in ['cmavo','gismu']:
            OUT[tokid] = str(WORD[tokid].replace('.','') not in catni)
        else:
            OUT[tokid] = "UNDEF"

    util.clear_annotation(out)
    util.write_annotation(out, OUT)
Пример #9
0
def experimental(word, tai, out, in_pickle):
    WORD = util.read_annotation(word)
    TAI = util.read_annotation(tai)
    catni = pickle.load(open(in_pickle, 'rb'))
    OUT = {}

    for tokid in WORD:
        if TAI[tokid] in ['cmavo', 'gismu']:
            OUT[tokid] = str(WORD[tokid].replace('.', '') not in catni)
        else:
            OUT[tokid] = "UNDEF"

    util.clear_annotation(out)
    util.write_annotation(out, OUT)
Пример #10
0
def vlatai(word, pos, out):
    WORD = util.read_annotation(word)
    POS = util.read_annotation(pos)

    inp = []
    tokids = []

    for tokid in WORD:
        inp.append(WORD[tokid].replace('.',''))
        tokids.append(tokid)

    p = Popen('vlatai', stdin=PIPE, stdout=PIPE, stderr=PIPE)
    lines, err = p.communicate(input='\n'.join(inp))
    p.wait()

    OUT = {}
    for tokid, line in zip(tokids, lines.split('\n')):
        v = line.split(':')[1].strip().split()[0]
        if v == 'cmavo(s)':
            v = 'cmavo'
        OUT[tokid] = v

    util.clear_annotation(out)
    util.write_annotation(out, OUT)
Пример #11
0
def vlatai(word, pos, out):
    WORD = util.read_annotation(word)
    POS = util.read_annotation(pos)

    inp = []
    tokids = []

    for tokid in WORD:
        inp.append(WORD[tokid].replace('.', ''))
        tokids.append(tokid)

    p = Popen('vlatai', stdin=PIPE, stdout=PIPE, stderr=PIPE)
    lines, err = p.communicate(input='\n'.join(inp))
    p.wait()

    OUT = {}
    for tokid, line in zip(tokids, lines.split('\n')):
        v = line.split(':')[1].strip().split()[0]
        if v == 'cmavo(s)':
            v = 'cmavo'
        OUT[tokid] = v

    util.clear_annotation(out)
    util.write_annotation(out, OUT)