Exemplo n.º 1
0
def counts(wordlist):
    for word in wordlist:
            stages = -2
            length = []
            result = []
            for form in word:
                if form != detone(form):
                    stage, gl = lemmatize(form, wl)
                else:
                    stage, gl = lemmatize(form,wl_detone)
                if stages < stage:
                    stages = stage
                result.extend(gl)
            length = len(dict_disambiguate(result))
            yield (stages, length, word)
Exemplo n.º 2
0
from orthograph import convertw, detone
from morphology import lemmatize, dict_disambiguate, print_gloss
import re

ci = ConcordanceIndex(test.words(), key=lambda s: s.lower())
types = list(set([s.lower() for s in set(test.words())]))
types.sort()

for word in types:
    if not re.search(r'[0-9.,;:!?]', word):
        ci.print_concordance(word, lines=15)
        print
        nw = convertw(word)
        nwl = [w for w in nw if w in wl]
        if nwl:
            formlist = nwl
        else:
            formlist = nw
        result = []
        for form in formlist:
            if form != detone(form):
                stage, gl = lemmatize(form, wl)
            else:
                stage, gl = lemmatize(form, wl_detone)
            result.extend(gl)

        glstr = [print_gloss(g) for g in dict_disambiguate(result)]
        for gs in glstr:
            print "    ", gs.encode('utf-8')
        print
Exemplo n.º 3
0
import re

ci = ConcordanceIndex(test.words(), key=lambda s:s.lower())
types = list(set([s.lower() for s in set(test.words())]))
types.sort()

for word in types:
    if not re.search(r'[0-9.,;:!?]', word):
        ci.print_concordance(word, lines=15)
        print 
        nw = convertw(word)
        nwl = [w for w in nw if w in wl]
        if nwl:
            formlist = nwl
        else:
            formlist = nw
        result = []
        for form in formlist:
            if form != detone(form):
                stage, gl = lemmatize(form, wl)
            else:
                stage, gl = lemmatize(form,wl_detone)
            result.extend(gl)

        glstr = [print_gloss(g) for g in dict_disambiguate(result)]
        for gs in glstr:
            print "    ", gs.encode('utf-8')
        print


Exemplo n.º 4
0
ww = [re.split(r'[- .,;!?"]', s) for s in sent if s]
words = [[w for w in wlist if w] for wlist in ww]

for sentence in words:
    i = 0
    print(' '.join(sentence).strip('\n\t'))
    for word in sentence:
        nw = convertw(word)
        nwl = [w for w in nw if w in wl]
        if nwl:
            formlist = nwl
        else:
            formlist = nw

        print('/'.join(formlist), "\t(",)
        for j in range(len(sentence)):
            if j == i:
                print(sentence[j].upper() + " ",)
            else:
                print(sentence[j] + " ",)
        print(")")
        for form in formlist:
            gl = dict_disambiguate(lemmatize(form, wl))
            glstr = [print_gloss(g) for g in gl]
            for gs in glstr:
                print("    ", gs)
        i += 1
        print
    print

Exemplo n.º 5
0
ww = [re.split(r'[- .,;!?"]', s) for s in sent if s]
words = [[w for w in wlist if w] for wlist in ww]

for sentence in words:
    i = 0
    print ' '.join(sentence).strip('\n\t').encode('utf-8')
    for word in sentence:
        nw = convertw(word)
        nwl = [w for w in nw if w in wl]
        if nwl:
            formlist = nwl
        else:
            formlist = nw

        print '/'.join(formlist).encode('utf-8'), "\t(",
        for j in range(len(sentence)):
            if j == i:
                print sentence[j].upper().encode('utf-8') + " ",
            else:
                print sentence[j].encode('utf-8') + " ",
        print ")"
        for form in formlist:
            gl = dict_disambiguate(lemmatize(form, wl))
            glstr = [print_gloss(g) for g in gl]
            for gs in glstr:
                print "    ", gs.encode('utf-8')
        i += 1
        print
    print