def vocalize(self,noun, proclitic,prefix,suffix,enclitic): """ Join the noun and its affixes, and get the vocalized form @param noun: noun found in dictionary. @type noun: unicode. @param proclitic: first level prefix. @type proclitic: unicode. @param prefix: second level suffix. @type prefix: unicode. @param suffix: second level suffix. @type suffix: unicode. @param enclitic: first level suffix. @type enclitic: unicode. @return: vocalized word. @rtype: unicode. """ enclitic_voc=stem_noun_const.COMP_SUFFIX_LIST_TAGS[enclitic]["vocalized"][0]; proclitic_voc=stem_noun_const.COMP_PREFIX_LIST_TAGS[proclitic]["vocalized"][0]; suffix_voc=suffix;#CONJ_SUFFIX_LIST_TAGS[suffix]["vocalized"][0]; #adjust some some harakat #strip last if tanwin or harakat if noun[-1:] in araby.HARAKAT:#(DAMMATAN,FATHATAN,KASRATAN,FATHA,DAMMA,KASRA): noun=noun[:-1]; #add shadda if the first letter is sunny and the prefix ends by al definition if proclitic.endswith(araby.ALEF+araby.LAM) and araby.isSun(noun[0]): noun=u''.join([noun[0],araby.SHADDA,noun[1:]]); #strip the Skun from the lam if proclitic_voc.endswith(araby.SUKUN): proclitic_voc=proclitic_voc[:-1]; noun=self.getWordVariant(noun,suffix); noun=self.getWordVariant(noun,enclitic); suffix_voc=self.getSuffixVariant(noun, suffix_voc,enclitic); return ''.join([ proclitic_voc,prefix,noun,suffix_voc,enclitic_voc]);
def vocalize(self, noun, proclitic, suffix, enclitic): """ Join the noun and its affixes, and get the vocalized form @param noun: noun found in dictionary. @type noun: unicode. @param proclitic: first level prefix. @type proclitic: unicode. @param suffix: second level suffix. @type suffix: unicode. @param enclitic: first level suffix. @type enclitic: unicode. @return: vocalized word. @rtype: unicode. """ # enclitic and procletric have only an uniq vocalization in arabic enclitic_voc = stem_noun_const.COMP_SUFFIX_LIST_TAGS[enclitic]["vocalized"][0]; proclitic_voc = stem_noun_const.COMP_PREFIX_LIST_TAGS[proclitic]["vocalized"][0]; suffix_voc = suffix;#CONJ_SUFFIX_LIST_TAGS[suffix]["vocalized"][0]; #adjust some some harakat #strip last if tanwin or last harakat if araby.isHaraka(noun[-1:]):#(DAMMATAN, FATHATAN, KASRATAN, FATHA, DAMMA, KASRA): noun = noun[:-1]; # convert Fathatan into one fatha, in some cases where the tanwin is not at the end: eg. محتوًى noun = noun.replace(araby.FATHATAN, araby.FATHA); #add shadda if the first letter is sunny and the procletic contains AL definition mark if (u'تعريف' in stem_noun_const.COMP_PREFIX_LIST_TAGS[proclitic]["tags"] and araby.isSun(noun[0])): #if (u'تعريف' in proclitic.endswith(araby.ALEF+araby.LAM) or proclitic.endswith(araby.LAM+araby.LAM)) and araby.isSun(noun[0]): noun = u''.join([noun[0], araby.SHADDA, noun[1:]]); #strip the Skun from the lam if proclitic_voc.endswith(araby.SUKUN): proclitic_voc=proclitic_voc[:-1]; # generate the word variant for some words witch ends by special letters like Teh_marbuta or Alef_maksura, or hamza, the variant is influed by the suffix harakat, # for example مدرسة+ي= مدرست+ي noun = self.getWordVariant(noun, suffix+enclitic); # generate the suffix variant. if the suffix is Teh_marbuta or Alef_maksura, or hamza, the variant is influed by the enclitic harakat, # for example مدرس+ة+ي=مدرس+ت+ي suffix_voc, suffix_NonIrabMark = self.getSuffixVariant(noun, suffix_voc, enclitic); #Get the enclitic variant to be joined to the word. #For example: word = مدرس, suffix=ِة, encletic=هُ. The enclitic is convert to HEH+ KAsra. enclitic_voc = self.getEncliticVariant(noun, suffix_voc, enclitic_voc); # generate the non vacalized end word: the vocalized word without the I3rab Mark # if the suffix is a short haraka wordNonIrabMark= ''.join([ proclitic_voc, noun, suffix_NonIrabMark, enclitic_voc]) wordVocalized =''.join([ proclitic_voc, noun, suffix_voc, enclitic_voc]); return wordVocalized,wordNonIrabMark
def vocalize(self, noun, proclitic, prefix, suffix, enclitic): """ Join the noun and its affixes, and get the vocalized form @param noun: noun found in dictionary. @type noun: unicode. @param proclitic: first level prefix. @type proclitic: unicode. @param prefix: second level suffix. @type prefix: unicode. @param suffix: second level suffix. @type suffix: unicode. @param enclitic: first level suffix. @type enclitic: unicode. @return: vocalized word. @rtype: unicode. """ enclitic_voc = stem_noun_const.COMP_SUFFIX_LIST_TAGS[enclitic][ "vocalized"][0] proclitic_voc = stem_noun_const.COMP_PREFIX_LIST_TAGS[proclitic][ "vocalized"][0] suffix_voc = suffix #CONJ_SUFFIX_LIST_TAGS[suffix]["vocalized"][0]; #adjust some some harakat #strip last if tanwin or last harakat if noun[-1:] in araby.HARAKAT: #(DAMMATAN,FATHATAN,KASRATAN,FATHA,DAMMA,KASRA): noun = noun[:-1] # convert Fathatan into one fatha: eg. محتوًى noun = noun.replace(araby.FATHATAN, araby.FATHA) #add shadda if the first letter is sunny and the prefix ends by al definition if (proclitic.endswith(araby.ALEF + araby.LAM) or proclitic.endswith(araby.LAM + araby.LAM)) and araby.isSun( noun[0]): noun = u''.join([noun[0], araby.SHADDA, noun[1:]]) #strip the Skun from the lam if proclitic_voc.endswith(araby.SUKUN): proclitic_voc = proclitic_voc[:-1] #noun=self.getWordVariant(noun,suffix); noun = self.getWordVariant(noun, suffix + enclitic) suffix_voc = self.getSuffixVariant(noun, suffix_voc, enclitic) enclitic_voc = self.getEncliticVariant(noun, suffix_voc, enclitic_voc) return ''.join([proclitic_voc, prefix, noun, suffix_voc, enclitic_voc])