示例#1
0
    def __init__(self, debug=False):
        # create a stemmer object for stemming enclitics and proclitics
        self.comp_stemmer = tashaphyne.stemming.ArabicLightStemmer()

        # configure the stemmer object
        self.comp_stemmer.set_prefix_list(SVC.COMP_PREFIX_LIST)
        self.comp_stemmer.set_suffix_list(SVC.COMP_SUFFIX_LIST)

        # create a stemmer object for stemming conjugated verb
        self.conj_stemmer = tashaphyne.stemming.ArabicLightStemmer()

        # configure the stemmer object
        self.conj_stemmer.set_prefix_list(SVC.CONJ_PREFIX_LIST)
        self.conj_stemmer.set_suffix_list(SVC.CONJ_SUFFIX_LIST)
        # enable the last mark (Harakat Al-I3rab)
        self.allow_syntax_lastmark = True

        # To show statistics about verbs
        #~statistics = {0:0, 1:0, 2:0, 3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0,
        #~10:0, 11:0, 12:0, 13:0, 14:0, 15:0, 16:0, 17:0, 18:0, 19:0, 20:0,
        #~}
        # affixes compatibility
        self.compatibility_cache = {}
        #~ self.verb_dict_cache = {}

        self.debug = debug
        self.cache_verb = {'verb': {}}

        self.verb_dictionary = arabicdictionary.ArabicDictionary("verbs")

        self.verb_stamp_pat = SVC.VERB_STAMP_PAT
示例#2
0
    def __init__(self, debug=False):
        # create a stemmer object for stemming enclitics and procletics
        self.comp_stemmer = tashaphyne.stemming.ArabicLightStemmer()

        # configure the stemmer object
        self.comp_stemmer.set_prefix_list(svconst.COMP_PREFIX_LIST)
        self.comp_stemmer.set_suffix_list(svconst.COMP_SUFFIX_LIST)

        # create a stemmer object for stemming conjugated verb
        self.conj_stemmer = tashaphyne.stemming.ArabicLightStemmer()

        # configure the stemmer object
        self.conj_stemmer.set_prefix_list(svconst.CONJ_PREFIX_LIST)
        self.conj_stemmer.set_suffix_list(svconst.CONJ_SUFFIX_LIST)
        # enable the last mark (Harakat Al-I3rab)
        self.allow_syntax_lastmark = True

        # To show statistics about verbs
        #~statistics = {0:0, 1:0, 2:0, 3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0,
        #~10:0, 11:0, 12:0, 13:0, 14:0, 15:0, 16:0, 17:0, 18:0, 19:0, 20:0,
        #~}

        self.debug = debug
        self.cache_verb = {'verb': {}}

        self.verb_dictionary = arabicdictionary.ArabicDictionary("verbs")

        self.verb_stamp_pat = re.compile(
            u"[%s%s%s%s%s%s]" %
            (araby.ALEF, araby.YEH, araby.WAW, araby.ALEF_MAKSURA, araby.HAMZA,
             araby.SHADDA), re.UNICODE)
示例#3
0
    def __init__(self, debug=False):
        # create a stemmer object for stemming enclitics and proclitics
        self.comp_stemmer = tashaphyne.stemming.ArabicLightStemmer()
        # configure the stemmer object
        self.comp_stemmer.set_prefix_list(SNC.COMP_PREFIX_LIST)
        self.comp_stemmer.set_suffix_list(SNC.COMP_SUFFIX_LIST)
        # create a stemmer object for stemming conjugated verb
        self.conj_stemmer = tashaphyne.stemming.ArabicLightStemmer()
        # configure the stemmer object
        self.conj_stemmer.set_prefix_list(SNC.CONJ_PREFIX_LIST)
        self.conj_stemmer.set_suffix_list(SNC.CONJ_SUFFIX_LIST)

        # generator
        self.generator = alyahmor.noun_affixer.noun_affixer()
        # enable the last mark (Harakat Al-I3rab)
        self.allow_syntax_lastmark = True

        # noun dictionary
        self.noun_dictionary = arabicdictionary.ArabicDictionary("nouns")
        # costum noun dictionary
        self.custom_noun_dictionary = custom_dictionary.custom_dictionary(
            "nouns")

        # allow to print internal results.
        self.cache_dict_search = {}
        self.cache_affixes_verification = {}
        self.debug = debug
        self.error_code = ""
示例#4
0
 def test_lookup(self):
     """Test lookup"""
     mydict = arz.ArabicDictionary('verbs')
     #~ wordlist = [u"استقلّ", u'استقل', u"كذب"]
     #~ foundlist = mydict.lookup(word)
     self.assertEqual(len(mydict.lookup(u"استقلّ")), 0)
     self.assertEqual(len(mydict.lookup(u'استقل')), 1)
     self.assertEqual(len(mydict.lookup(u"كذب")), 2)