示例#1
0
    def create_stop_word_remover(self, isDev=False):
        if isDev:
            stopWords = self.get_stop_words()
            dictionary = ArrayDictionary(stopWords)
        else:
            dictionary = self.get_prod_stop_word_dictionary()

        stopWordRemover = StopWordRemover(dictionary)
        return stopWordRemover
示例#2
0
    def create_stemmer(self, isDev=False):
        """ Returns Stemmer instance """

        words = self.get_words(isDev)
        dictionary = ArrayDictionary(words)
        stemmer = Stemmer(dictionary)

        resultCache = ArrayCache()
        cachedStemmer = CachedStemmer(resultCache, stemmer)

        return cachedStemmer
    def create_stemmer(self, isDev=False):
        """ Returns Stemmer instance """
        if isDev:
            words = self.get_words_from_file()
            dictionary = ArrayDictionary(words)
        else:
            dictionary = self.get_prod_words_dictionary()

        stemmer = Stemmer(dictionary)

        return stemmer
示例#4
0
 def setUp(self):
     self.dictionary = ArrayDictionary(['di', 'ke'])
     self.stopWordRemover = StopWordRemover(self.dictionary)
     return super(Test_StopWordRemoverTest, self).setUp()
示例#5
0
 def setUp(self):
     self.dictionary = ArrayDictionary([
         'hancur',
         'benar',
         'apa',
         'siapa',
         'jubah',
         'baju',
         'beli',
         'celana',
         'hantu',
         'jual',
         'buku',
         'milik',
         'kulit',
         'sakit',
         'kasih',
         'buang',
         'suap',
         'nilai',
         'beri',
         'rambut',
         'adu',
         'suara',
         'daerah',
         'ajar',
         'kerja',
         'ternak',
         'asing',
         'raup',
         'gerak',
         'puruk',
         'terbang',
         'lipat',
         'ringkas',
         'warna',
         'yakin',
         'bangun',
         'fitnah',
         'vonis',
         'baru',
         'ajar',
         'tangkap',
         'kupas',
         'minum',
         'pukul',
         'cinta',
         'dua',
         'jauh',
         'ziarah',
         'nuklir',
         'gila',
         'hajar',
         'qasar',
         'udara',
         'populer',
         'warna',
         'yoga',
         'adil',
         'rumah',
         'muka',
         'labuh',
         'tarung',
         'tebar',
         'indah',
         'daya',
         'untung',
         'sepuluh',
         'ekonomi',
         'makmur',
         'telah',
         'serta',
         'percaya',
         'pengaruh',
         'kritik',
         'seko',
         'sekolah',
         'tahan',
         'capa',
         'capai',
         'mula',
         'mulai',
         'petan',
         'tani',
         'aba',
         'abai',
         'balas',
         'balik',
         'peran',
         'medan',
         'syukur',
         'syarat',
         'bom',
         'promosi',
         'proteksi',
         'prediksi',
         'kaji',
         'sembunyi',
         'langgan',
         'laku',
         'baik',
         'terang',
         'iman',
         'bisik',
         'taat',
         'puas',
         'makan',
         'nyala',
         'nyanyi',
         'nyata',
         'nyawa',
         'rata',
         'lembut',
         'ligas',
         'budaya',
         'karya',
         'ideal',
         'final',
         'taat',
         'tiru',
         'sepak',
         'kuasa',
         'malaikat',
         'nikmat',  # sastrawi additional rules
         'lewat',
         'nganga',
         'allah',
     ])
     self.stemmer = Stemmer(self.dictionary)
     return super(Test_StemmerTest, self).setUp()
    def create_stop_word_remover(self):
        stopWords = self.get_stop_words()
        dictionary = ArrayDictionary(stopWords)
        stopWordRemover = StopWordRemover(dictionary)

        return stopWordRemover
 def get_prod_words_dictionary(self):
     words = self.get_words_from_file()
     dictionary = ArrayDictionary(words)
     return dictionary
示例#8
0
 def setUp(self):
     self.dictionary = ArrayDictionary()
     return super(Test_ArrayDictionaryTest, self).setUp()
示例#9
0
class Test_ArrayDictionaryTest(unittest.TestCase):
    def setUp(self):
        self.dictionary = ArrayDictionary()
        return super(Test_ArrayDictionaryTest, self).setUp()

    def test_add_and_contain(self):
        self.assertFalse(self.dictionary.contains('word'))
        self.dictionary.add('word')
        self.assertTrue(self.dictionary.contains('word'))

    def test_add_count_word(self):
        self.assertEquals(0, self.dictionary.count())
        self.dictionary.add('word')
        self.assertEquals(1, self.dictionary.count())

    def test_add_word_ignore_empty_string(self):
        self.assertEquals(0, self.dictionary.count())
        self.dictionary.add('')
        self.assertEquals(0, self.dictionary.count())

    def test_add_words(self):
        words = ['word1', 'word2']
        self.dictionary.add_words(words)
        self.assertEquals(2, self.dictionary.count())
        self.assertTrue(self.dictionary.contains('word1'))
        self.assertTrue(self.dictionary.contains('word2'))

    def test_constructor_preserve_words(self):
        words = ['word1', 'word2']
        dictionary = ArrayDictionary(words)
        self.assertEquals(2, dictionary.count())
        self.assertTrue(dictionary.contains('word1'))
        self.assertTrue(dictionary.contains('word2'))
示例#10
0
 def test_constructor_preserve_words(self):
     words = ['word1', 'word2']
     dictionary = ArrayDictionary(words)
     self.assertEquals(2, dictionary.count())
     self.assertTrue(dictionary.contains('word1'))
     self.assertTrue(dictionary.contains('word2'))
 def test_non_dict_list(self):
     dictionary = ArrayDictionary('$$%&**&(^&')
     self.assertEqual(0, dictionary.count())
 def test_dict_param(self):
     dictionary = ArrayDictionary({'word1':'word1', 'word2':'word2'})
     self.assertTrue(dictionary.contains('word1'))
     self.assertTrue(dictionary.contains('word2'))
     self.assertFalse(dictionary.contains('word3'))
     self.assertEqual(2, dictionary.count())
     dictionary.add('word3')
     dictionary.add(' ')
     self.assertTrue(dictionary.contains('word3'))
     self.assertEqual(3, dictionary.count())
class Test_ArrayDictionaryTest(unittest.TestCase):
    def setUp(self):
        self.dictionary = ArrayDictionary()
        return super(Test_ArrayDictionaryTest, self).setUp()

    def test_add_and_contain(self):
        self.assertFalse(self.dictionary.contains('word'))
        self.dictionary.add('word')
        self.assertTrue(self.dictionary.contains('word'))

    def test_add_count_word(self):
        self.assertEquals(0, self.dictionary.count())
        self.dictionary.add('word')
        self.assertEquals(1, self.dictionary.count())

    def test_add_word_ignore_empty_string(self):
        self.assertEquals(0, self.dictionary.count())
        self.dictionary.add('')
        self.assertEquals(0, self.dictionary.count())

    def test_add_words(self):
        words = ['word1', 'word2']
        self.dictionary.add_words(words)
        self.assertEquals(2, self.dictionary.count())
        self.assertTrue(self.dictionary.contains('word1'))
        self.assertTrue(self.dictionary.contains('word2'))

    def test_constructor_preserve_words(self):
        words = ['word1', 'word2']
        dictionary = ArrayDictionary(words)
        self.assertEquals(2, dictionary.count())
        self.assertTrue(dictionary.contains('word1'))
        self.assertTrue(dictionary.contains('word2'))

    # Test ArrayDictionary dengan tipe data dict
    # @author Mufid Jamaluddin
    def test_dict_param(self):
        dictionary = ArrayDictionary({'word1':'word1', 'word2':'word2'})
        self.assertTrue(dictionary.contains('word1'))
        self.assertTrue(dictionary.contains('word2'))
        self.assertFalse(dictionary.contains('word3'))
        self.assertEqual(2, dictionary.count())
        dictionary.add('word3')
        dictionary.add(' ')
        self.assertTrue(dictionary.contains('word3'))
        self.assertEqual(3, dictionary.count())

    def test_non_dict_list(self):
        dictionary = ArrayDictionary('$$%&**&(^&')
        self.assertEqual(0, dictionary.count())
示例#14
0
 def get_prod_stop_word_dictionary(self):
     stopWords = self.get_stop_words()
     return ArrayDictionary(stopWords)
示例#15
0
 def create_custom_stemmer(self, isDev=False, words):
     dictionary = ArrayDictionary(words)