class PLPTestCase(unittest.TestCase): def setUp(self): self.plp = PLP('/usr/local/clp/lib/libclp_2.6.so') def test_ver(self): self.assertIsInstance(self.plp.ver() , unicode) def test_rec(self): self.assertEqual(self.plp.rec(u'żółwiem'), [18660912]) def test_orec(self): self.assertEqual(self.plp.rec(u'zolwiem'), []) self.assertEqual(self.plp.orec(u'zolwiem'), [18660912]) def test_bform(self): self.assertEqual(self.plp.bform(18660912), u'żółw') def test_label(self): self.assertEqual(self.plp.label(18660912)[0], PLP.CZESCI_MOWY.RZECZOWNIK) self.assertEqual(self.plp.label(self.plp.rec(u'idę')[0])[0], PLP.CZESCI_MOWY.CZASOWNIK) def test_ogonkify(self): self.assertItemsEqual(self.plp.ogonkify(u'gzo'), [u'gzó', u'gżo', u'gźo', u'gźó', u'gżó']) def test_forms(self): self.assertEqual(self.plp.forms(17786048), [ u'pogoda', u'pogody', u'pogodzie', u'pogodę', u'pogodą', u'pogodo', u'pogód', u'pogodom', u'pogodami', u'pogodach' ]) def test_vec(self): self.assertEqual(self.plp.vec(18660912, u'żółwiem')[0], 5)
#!/usr/bin/env python # encoding: utf-8 from plp import PLP p = PLP() VERB = PLP.CZESCI_MOWY.CZASOWNIK stimulus = u'fajka' st_forms = set(p.forms(p.rec(u'fajka')[0])) print st_forms snippets_count = 0 def parse_file(filename): global snippets_count with open(filename, 'r') as f: all_words = [] for line in f: words = line.strip().split() all_words.extend(words) stimulus_seen = False last_verb = None second_to_last_verb = None last_verb_index = 0 for i, word in enumerate(all_words): word_utf8 = word.decode('utf-8') if word_utf8 in st_forms or word_utf8[:-1] in st_forms: #print 'stimulus_seen'