Пример #1
0
class PLPTestCase(unittest.TestCase):
    def setUp(self):
        self.plp = PLP('/usr/local/clp/lib/libclp_2.6.so')

    def test_ver(self):
        self.assertIsInstance(self.plp.ver() , unicode)

    def test_rec(self):
        self.assertEqual(self.plp.rec(u'żółwiem'), [18660912])

    def test_orec(self):
        self.assertEqual(self.plp.rec(u'zolwiem'), [])
        self.assertEqual(self.plp.orec(u'zolwiem'), [18660912])

    def test_bform(self):
        self.assertEqual(self.plp.bform(18660912), u'żółw')

    def test_label(self):
        self.assertEqual(self.plp.label(18660912)[0], PLP.CZESCI_MOWY.RZECZOWNIK)
        self.assertEqual(self.plp.label(self.plp.rec(u'idę')[0])[0], PLP.CZESCI_MOWY.CZASOWNIK)

    def test_ogonkify(self):
        self.assertItemsEqual(self.plp.ogonkify(u'gzo'), [u'gzó', u'gżo', u'gźo', u'gźó', u'gżó'])

    def test_forms(self):
        self.assertEqual(self.plp.forms(17786048), [
            u'pogoda',
            u'pogody',
            u'pogodzie',
            u'pogodę',
            u'pogodą',
            u'pogodo',
            u'pogód',
            u'pogodom',
            u'pogodami',
            u'pogodach'
        ])

    def test_vec(self):
        self.assertEqual(self.plp.vec(18660912, u'żółwiem')[0], 5)
Пример #2
0
#!/usr/bin/env python
# encoding: utf-8

from plp import PLP
p = PLP()

VERB = PLP.CZESCI_MOWY.CZASOWNIK

stimulus = u'fajka'
st_forms = set(p.forms(p.rec(u'fajka')[0]))
print st_forms

snippets_count = 0

def parse_file(filename):
    global snippets_count
    with open(filename, 'r') as f:
        all_words = []
        for line in f:
            words = line.strip().split()
            all_words.extend(words)

        stimulus_seen = False
        last_verb = None
        second_to_last_verb = None
        last_verb_index = 0
    
        for i, word in enumerate(all_words):
            word_utf8 = word.decode('utf-8')
            if word_utf8 in st_forms or word_utf8[:-1] in st_forms:
                #print 'stimulus_seen'