Exemplo n.º 1
0
class PLPTestCase(unittest.TestCase):
    def setUp(self):
        self.plp = PLP('/usr/local/clp/lib/libclp_2.6.so')

    def test_ver(self):
        self.assertIsInstance(self.plp.ver() , unicode)

    def test_rec(self):
        self.assertEqual(self.plp.rec(u'żółwiem'), [18660912])

    def test_orec(self):
        self.assertEqual(self.plp.rec(u'zolwiem'), [])
        self.assertEqual(self.plp.orec(u'zolwiem'), [18660912])

    def test_bform(self):
        self.assertEqual(self.plp.bform(18660912), u'żółw')

    def test_label(self):
        self.assertEqual(self.plp.label(18660912)[0], PLP.CZESCI_MOWY.RZECZOWNIK)
        self.assertEqual(self.plp.label(self.plp.rec(u'idę')[0])[0], PLP.CZESCI_MOWY.CZASOWNIK)

    def test_ogonkify(self):
        self.assertItemsEqual(self.plp.ogonkify(u'gzo'), [u'gzó', u'gżo', u'gźo', u'gźó', u'gżó'])

    def test_forms(self):
        self.assertEqual(self.plp.forms(17786048), [
            u'pogoda',
            u'pogody',
            u'pogodzie',
            u'pogodę',
            u'pogodą',
            u'pogodo',
            u'pogód',
            u'pogodom',
            u'pogodami',
            u'pogodach'
        ])

    def test_vec(self):
        self.assertEqual(self.plp.vec(18660912, u'żółwiem')[0], 5)
Exemplo n.º 2
0
        notes = {}

        for note in pap:
            if not re.search('\d{6}', note):
                continue

            note_id = re.findall('\d{6}', note)[0]
            note_content = re.sub(note_id, '', note).strip()

            words = pre_process(note_content).split(' ')
            for i, word in enumerate(words):
                if not p.rec(word):
                    continue

                basic_form = p.bform(p.rec(word)[0])
                if basic_form in stop_list:
                    continue

                # words frequencies and total words number
                word = strip_sie(basic_form)
                if word in words_freq:
                    words_freq[word] += 1
                else:
                    words_freq[word] = 1
                total_no += 1

                # co-occurence frequencies
                if word == STIMULUS:
                    min_idx = min(0, i - 12)
                    max_idx = min(len(words) - 1, i + 12)