Exemplo n.º 1
0
# coding=utf-8
from collections import Counter

from plp import PLP

p = PLP()


def basic_form(word):
    ids = p.rec(word)
    return p.bform(ids[0]) if len(ids) > 0 else word


def stats_sorted(stats):
    return sorted(stats, key=lambda x: x[1], reverse=True)


def ranking(words):
    basic_forms = [basic_form(word) for word in words]
    stats = Counter(basic_forms)
    return stats_sorted(stats.items()), stats_sorted(stats.most_common(100))
Exemplo n.º 2
0
def getForms(bodziec):
    return map(lambda x: PLP().forms(x), PLP().orec(bodziec))[0]
Exemplo n.º 3
0
# coding: utf-8
import codecs
from collections import defaultdict

from plp import PLP

__author__ = "Michał Ciołczyk"

_FILENAME = "data/odm.txt"
_ENCODING = "windows-1250"
_basic_forms = defaultdict(list)
_initialized = False
_plp = PLP()
_SIE = ' się'


def _load_flection_map():
    global _initialized
    if not _initialized:
        with codecs.open(_FILENAME, 'r', encoding=_ENCODING) as f:
            for line in f:
                forms = line.rstrip('\n').split(', ')
                bform = forms[0]
                for form in forms:
                    _basic_forms[form].append(bform)
        for form, bforms in _basic_forms.items():
            _basic_forms[form] = list(set(bforms))
        _initialized = True


def _strip_sie(form):
Exemplo n.º 4
0
 def setUp(self):
     self.plp = PLP('/usr/local/clp/lib/libclp_2.6.so')