def vocalize(noun, proclitic, prefix, suffix, enclitic): """ Join the noun and its affixes, and get the vocalized form @param noun: noun found in dictionary. @type noun: unicode. @param proclitic: first level prefix. @type proclitic: unicode. @param prefix: second level suffix. @type prefix: unicode. @param suffix: second level suffix. @type suffix: unicode. @param enclitic: first level suffix. @type enclitic: unicode. @return: vocalized word. @rtype: unicode. """ enclitic_voc = snconst.COMP_SUFFIX_LIST_TAGS[enclitic]["vocalized"][0] proclitic_voc = snconst.COMP_PREFIX_LIST_TAGS[proclitic]["vocalized"][0] suffix_voc = suffix #adjust some some harakat #strip last if tanwin or harakat if noun[-1:] in araby.HARAKAT: noun = noun[:-1] #completate the dictionary word vocalization # this allow to avoid some missed harakat before ALEF # in the dictionary form of word, all alefat are preceded by Fatha #~noun = araby.complet #~ print "stem_unknown.vocalize; before", noun.encode('utf8'); noun = noun.replace(araby.ALEF, araby.FATHA + araby.ALEF) #~ print "stem_unknown.vocalize; 2", noun.encode('utf8'); noun = noun.replace(araby.ALEF_MAKSURA, araby.FATHA + araby.ALEF_MAKSURA) noun = re.sub(ur"(%s)+"%araby.FATHA , araby.FATHA, noun) # remove initial fatha if alef is the first letter noun = re.sub(ur"^(%s)+"%araby.FATHA , "", noun) #~ print "stem_unknown.vocalize; 3", noun.encode('utf8'); #add shadda if the first letter is sunny and the prefix #ends by al definition if proclitic.endswith(araby.ALEF+araby.LAM) and araby.is_sun(noun[0]): noun = u''.join([noun[0], araby.SHADDA, noun[1:]]) #strip the Skun from the lam if proclitic_voc.endswith(araby.SUKUN): proclitic_voc = proclitic_voc[:-1] noun = get_word_variant(noun, suffix) noun = get_word_variant(noun, enclitic) suffix_voc = get_suffix_variant(noun, suffix_voc, enclitic) return ''.join([ proclitic_voc, prefix, noun, suffix_voc, enclitic_voc])
def vocalize(noun, proclitic, prefix, suffix, enclitic): """ Join the noun and its affixes, and get the vocalized form @param noun: noun found in dictionary. @type noun: unicode. @param proclitic: first level prefix. @type proclitic: unicode. @param prefix: second level suffix. @type prefix: unicode. @param suffix: second level suffix. @type suffix: unicode. @param enclitic: first level suffix. @type enclitic: unicode. @return: vocalized word. @rtype: unicode. """ enclitic_voc = snconst.COMP_SUFFIX_LIST_TAGS[enclitic]["vocalized"][0] proclitic_voc = snconst.COMP_PREFIX_LIST_TAGS[proclitic]["vocalized"][0] suffix_voc = suffix #adjust some some harakat #strip last if tanwin or harakat if noun[-1:] in araby.HARAKAT: noun = noun[:-1] #completate the dictionary word vocalization # this allow to avoid some missed harakat before ALEF # in the dictionary form of word, all alefat are preceded by Fatha #~noun = araby.complet #~ print "stem_unknown.vocalize; before", noun.encode('utf8'); noun = noun.replace(araby.ALEF, araby.FATHA + araby.ALEF) #~ print "stem_unknown.vocalize; 2", noun.encode('utf8'); noun = noun.replace(araby.ALEF_MAKSURA, araby.FATHA + araby.ALEF_MAKSURA) noun = re.sub(u"(%s)+" % araby.FATHA, araby.FATHA, noun) # remove initial fatha if alef is the first letter noun = re.sub(u"^(%s)+" % araby.FATHA, "", noun) #~ print "stem_unknown.vocalize; 3", noun.encode('utf8'); #add shadda if the first letter is sunny and the prefix #ends by al definition if proclitic.endswith(araby.ALEF + araby.LAM) and araby.is_sun(noun[0]): noun = u''.join([noun[0], araby.SHADDA, noun[1:]]) #strip the Skun from the lam if proclitic_voc.endswith(araby.SUKUN): proclitic_voc = proclitic_voc[:-1] noun = get_word_variant(noun, suffix) noun = get_word_variant(noun, enclitic) suffix_voc = get_suffix_variant(noun, suffix_voc, enclitic) return ''.join([proclitic_voc, prefix, noun, suffix_voc, enclitic_voc])
def test_is_letter(self): self.assertTrue(Araby.is_sukun(Araby.SUKUN)) self.assertTrue(Araby.is_shadda(Araby.SHADDA)) self.assertTrue(Araby.is_tatweel(Araby.TATWEEL)) for archar in Araby.TANWIN: self.assertTrue(Araby.is_tanwin(archar)) for archar in Araby.TASHKEEL: self.assertTrue(Araby.is_tashkeel(archar)) for haraka in Araby.HARAKAT: self.assertTrue(Araby.is_haraka(haraka)) for short_haraka in Araby.SHORTHARAKAT: self.assertTrue(Araby.is_shortharaka(short_haraka)) for liguature in Araby.LIGUATURES: self.assertTrue(Araby.is_ligature(liguature)) for hamza in Araby.HAMZAT: self.assertTrue(Araby.is_hamza(hamza)) for alef in Araby.ALEFAT: self.assertTrue(Araby.is_alef(alef)) for yeh in Araby.YEHLIKE: self.assertTrue(Araby.is_yehlike(yeh)) for waw in Araby.WAWLIKE: self.assertTrue(Araby.is_wawlike(waw)) for teh in Araby.TEHLIKE: self.assertTrue(Araby.is_teh) for small in Araby.SMALL: self.assertTrue(Araby.is_small(small)) for weak in Araby.WEAK: self.assertTrue(Araby.is_weak(weak)) for archar in Araby.MOON: self.assertTrue(Araby.is_moon(archar)) for archar in Araby.SUN: self.assertTrue(Araby.is_sun(archar))
def vocalize(noun, proclitic, prefix, suffix, enclitic): """ Join the noun and its affixes, and get the vocalized form @param noun: noun found in dictionary. @type noun: unicode. @param proclitic: first level prefix. @type proclitic: unicode. @param prefix: second level suffix. @type prefix: unicode. @param suffix: second level suffix. @type suffix: unicode. @param enclitic: first level suffix. @type enclitic: unicode. @return: vocalized word. @rtype: unicode. """ enclitic_voc = snconst.COMP_SUFFIX_LIST_TAGS[enclitic]["vocalized"][0] proclitic_voc = \ snconst.COMP_PREFIX_LIST_TAGS[proclitic]["vocalized"][0] suffix_voc = suffix #CONJ_SUFFIX_LIST_TAGS[suffix]["vocalized"][0] #adjust some some harakat #strip last if tanwin or harakat if noun[-1:] in araby.HARAKAT: noun = noun[:-1] #add shadda if the first letter is sunny and the prefix #ends by al definition if proclitic.endswith(araby.ALEF+araby.LAM) and araby.is_sun(noun[0]): noun = u''.join([noun[0], araby.SHADDA, noun[1:]]) #strip the Skun from the lam if proclitic_voc.endswith(araby.SUKUN): proclitic_voc = proclitic_voc[:-1] noun = get_word_variant(noun, suffix) noun = get_word_variant(noun, enclitic) suffix_voc = get_suffix_variant(noun, suffix_voc, enclitic) return ''.join([ proclitic_voc, prefix, noun, suffix_voc, enclitic_voc])
if araby.is_shadda(c): print("shadda", end=" ") if araby.is_tatweel(c): print("tatweel", end=" ") if araby.is_tashkeel(c): print("tashkeel", end=" ") if araby.is_tanwin(c): print("tanwin", end=" ") if araby.is_shortharaka(c): print("short haraka", end=" ") if araby.is_ligature(c): print(" ligature", end=" ") if araby.is_ligature(c): print('ligature', end=" ") if araby.is_hamza(c): print('hamza', end=" ") if araby.is_alef(c): print('alef', end=" ") if araby.is_yehlike(c): print('yeh', end=" ") if araby.is_wawlike(c): print('waw', end=" ") if araby.is_teh(c): print('teh', end=" ") if araby.is_small(c): print('small', end=" ") if araby.is_weak(c): print('weak', end=" ") if araby.is_moon(c): print('moon', end=" ") if araby.is_sun(c): print('sun', end=" ") print(araby.order(c), end=" ") print() word = u"الْعَرَيِيّةُ" word_list = [ u"الْعَرَيِيّةُ", u"العربية", u"الْعَرَيِيّةُ الفصحى", u"غير مشكول", "Taha", ] word1 = u"" for word in word_list: print(word, '\t', end=" ") if araby.is_vocalized(word): print(' is vocalized', end=" ") if araby.is_vocalizedtext(word): print(' is vocalized text', end=" ")
if araby.is_shadda(c): print ("shadda") if araby.is_tatweel(c): print ("tatweel") if araby.is_tashkeel(c): print ("tashkeel") if araby.is_tanwin(c): print ("tanwin") if araby.is_shortharaka(c): print ("short haraka"), if araby.is_ligature(c):print (" ligature"), if araby.is_ligature(c):print ('ligature'), if araby.is_hamza(c): print ('hamza'), if araby.is_alef(c): print ('alef'), if araby.is_yehlike(c): print ('yeh'), if araby.is_wawlike(c): print ('waw'), if araby.is_teh(c): print ('teh'), if araby.is_small(c): print ('small'), if araby.is_weak(c): print ('weak'), if araby.is_moon(c): print ('moon'), if araby.is_sun(c):print ('sun'), print (araby.order(c)), print (); word=u"الْعَرَيِيّةُ" word_list=[ u"الْعَرَيِيّةُ", u"العربية", u"الْعَرَيِيّةُ الفصحى", u"غير مشكول", "Taha", u"سئل لأنه يؤم الإمام" ] word1=u"" for word in word_list: print (word) if araby.is_vocalized(word): print (' is vocalized')
def vocalize(noun, proclitic, suffix, enclitic): """ Join the noun and its affixes, and get the vocalized form @param noun: noun found in dictionary. @type noun: unicode. @param proclitic: first level prefix. @type proclitic: unicode. @param suffix: second level suffix. @type suffix: unicode. @param enclitic: first level suffix. @type enclitic: unicode. @return: vocalized word. @rtype: unicode. """ # procletic have only an uniq vocalization in arabic proclitic_voc = snconst.COMP_PREFIX_LIST_TAGS[proclitic]["vocalized"][0] # encletic can be variant according to suffix #print (u"vocalize: '%s' '%s'"%(enclitic, noun)).encode('utf8') enclitic_voc = snconst.COMP_SUFFIX_LIST_TAGS[enclitic]["vocalized"][0] enclitic_voc, encl_voc_non_inflect = get_enclitic_variant( enclitic_voc, suffix) suffix_voc = suffix #adjust some some harakat #strip last if tanwin or last harakat if ar.is_haraka(noun[-1:]): #(DAMMATAN, FATHATAN, KASRATAN, FATHA, DAMMA, KASRA): noun = noun[:-1] # convert Fathatan into one fatha, in some cases where # #the tanwin is not at the end: eg. محتوًى noun = noun.replace(ar.FATHATAN, ar.FATHA) #add shadda if the first letter is sunny and the procletic #contains AL definition mark if u'تعريف' in snconst.COMP_PREFIX_LIST_TAGS[proclitic]["tags"]\ and ar.is_sun(noun[0]): noun = u''.join([noun[0], ar.SHADDA, noun[1:]]) #strip the Skun from the lam if proclitic_voc.endswith(ar.SUKUN): proclitic_voc = proclitic_voc[:-1] #completate the dictionary word vocalization # this allow to avoid some missed harakat before ALEF # in the dictionary form of word, all alefat are preceded by Fatha #~noun = ar.complet #~ print "stem_noun.vocalize; before", noun.encode('utf8'); noun = noun.replace(ar.ALEF, ar.FATHA + ar.ALEF) #~ print "stem_noun.vocalize; 2", noun.encode('utf8'); noun = noun.replace(ar.ALEF_MAKSURA, ar.FATHA + ar.ALEF_MAKSURA) noun = re.sub(ur"(%s)+" % ar.FATHA, ar.FATHA, noun) # remove initial fatha if alef is the first letter noun = re.sub(ur"^(%s)+" % ar.FATHA, "", noun) #~ print "stem_noun.vocalize; 3", noun.encode('utf8'); # generate the word variant for some words witch ends by special #letters like Teh_marbuta or Alef_maksura, or hamza, #the variant is influed by the suffix harakat, # for example مدرسة+ي = مدرست+ي mankous = True if noun.endswith(ar.KASRA + ar.YEH) else False noun = get_word_variant(noun, suffix, enclitic) # generate the suffix variant. if the suffix is Teh_marbuta or #Alef_maksura, or hamza, the variant is influed by the enclitic harakat, # for example مدرس+ة+ي = مدرس+ت+ي suffix_voc, suffix_non_irab_mark = get_suffix_variants( noun, suffix_voc, enclitic, mankous) # generate the non vacalized end word: the vocalized word # without the I3rab Mark # if the suffix is a short haraka word_non_irab_mark = ''.join([ proclitic_voc, noun, suffix_non_irab_mark, encl_voc_non_inflect ]) # ajust the semivocalized form word_non_irab_mark = re.sub(ur"(%s)+" % ar.FATHA, ar.FATHA, word_non_irab_mark) word_non_irab_mark = re.sub( u"(%s%s%s)+" % (ar.FATHA, ar.ALEF_MAKSURA, ar.KASRATAN), ar.FATHATAN + ar.ALEF_MAKSURA, word_non_irab_mark) word_non_irab_mark = re.sub(ur"%s%s%s" % (ar.FATHA, ar.ALEF_MAKSURA, ar.KASRA), ar.FATHA + ar.ALEF_MAKSURA, word_non_irab_mark) word_non_irab_mark = re.sub(ur"%s[%s|%s|%s]" % (ar.ALEF_MAKSURA, ar.DAMMA, ar.FATHA, ar.KASRA), ar.ALEF_MAKSURA, word_non_irab_mark) #generate vocalized form word_vocalized = ''.join([proclitic_voc, noun, suffix_voc, enclitic_voc]) #used for spelling purposes segmented = '-'.join([proclitic_voc, noun, suffix_voc, enclitic_voc]) segmented = ar.strip_tashkeel(segmented) #~word_vocalized = ar.ajust_vocalization(word_vocalized) word_vocalized = re.sub(ur"(%s)+" % ar.FATHA, ar.FATHA, word_vocalized) word_vocalized = re.sub(ur"%s%s%s" % (ar.FATHA, ar.ALEF_MAKSURA, ar.KASRATAN), ar.FATHATAN + ar.ALEF_MAKSURA, word_vocalized) word_vocalized = re.sub(ur"%s%s%s" % (ar.FATHA, ar.ALEF_MAKSURA, ar.DAMMATAN), ar.FATHATAN + ar.ALEF_MAKSURA, word_vocalized) word_vocalized = re.sub(ur"%s%s%s" % (ar.FATHA, ar.ALEF_MAKSURA, ar.FATHATAN), ar.FATHATAN + ar.ALEF_MAKSURA, word_vocalized) word_vocalized = re.sub(ur"%s%s%s" % (ar.FATHA, ar.ALEF_MAKSURA, ar.KASRA), ar.FATHA + ar.ALEF_MAKSURA, word_vocalized) word_vocalized = re.sub(ur"%s[%s|%s|%s]" % (ar.ALEF_MAKSURA, ar.DAMMA, ar.FATHA, ar.KASRA), ar.ALEF_MAKSURA, word_vocalized) return word_vocalized, word_non_irab_mark, segmented
if araby.is_shadda(c): print "shadda", if araby.is_tatweel(c): print "tatweel", if araby.is_tashkeel(c): print "tashkeel", if araby.is_tanwin(c): print "tanwin", if araby.is_shortharaka(c): print "short haraka", if araby.is_ligature(c):print " ligature", if araby.is_ligature(c):print 'ligature', if araby.is_hamza(c): print 'hamza', if araby.is_alef(c): print 'alef', if araby.is_yehlike(c): print 'yeh', if araby.is_wawlike(c): print 'waw', if araby.is_teh(c): print 'teh', if araby.is_small(c): print 'small', if araby.is_weak(c): print 'weak', if araby.is_moon(c): print 'moon', if araby.is_sun(c):print 'sun', print araby.order(c), print; word=u"الْعَرَيِيّةُ" word_list=[ u"الْعَرَيِيّةُ", u"العربية", u"الْعَرَيِيّةُ الفصحى", u"غير مشكول", "Taha", ] word1=u"" for word in word_list: print word.encode('utf8'),'\t', if araby.is_vocalized(word): print ' is vocalized', ## if araby.isArabicstring(word): print ' iisArabicstring',
def vocalize( noun, proclitic, suffix, enclitic): """ Join the noun and its affixes, and get the vocalized form @param noun: noun found in dictionary. @type noun: unicode. @param proclitic: first level prefix. @type proclitic: unicode. @param suffix: second level suffix. @type suffix: unicode. @param enclitic: first level suffix. @type enclitic: unicode. @return: vocalized word. @rtype: unicode. """ # procletic have only an uniq vocalization in arabic proclitic_voc = snconst.COMP_PREFIX_LIST_TAGS[proclitic]["vocalized"][0] # encletic can be variant according to suffix #print (u"vocalize: '%s' '%s'"%(enclitic, noun)).encode('utf8') enclitic_voc = snconst.COMP_SUFFIX_LIST_TAGS[enclitic]["vocalized"][0] enclitic_voc,enclitic_voc_non_inflected = get_enclitic_variant(enclitic_voc, suffix) suffix_voc = suffix #adjust some some harakat #strip last if tanwin or last harakat if araby.is_haraka(noun[-1:]): #(DAMMATAN, FATHATAN, KASRATAN, FATHA, DAMMA, KASRA): noun = noun[:-1] # convert Fathatan into one fatha, in some cases where # #the tanwin is not at the end: eg. محتوًى noun = noun.replace(araby.FATHATAN, araby.FATHA) #add shadda if the first letter is sunny and the procletic #contains AL definition mark if (u'تعريف' in snconst.COMP_PREFIX_LIST_TAGS[proclitic]["tags"]\ and araby.is_sun(noun[0])): noun = u''.join([noun[0], araby.SHADDA, noun[1:]]) #strip the Skun from the lam if proclitic_voc.endswith(araby.SUKUN): proclitic_voc = proclitic_voc[:-1] # generate the word variant for some words witch ends by special #letters like Teh_marbuta or Alef_maksura, or hamza, #the variant is influed by the suffix harakat, # for example مدرسة+ي = مدرست+ي noun = get_word_variant(noun, suffix+enclitic) # generate the suffix variant. if the suffix is Teh_marbuta or #Alef_maksura, or hamza, the variant is influed by the enclitic harakat, # for example مدرس+ة+ي = مدرس+ت+ي suffix_voc, suffix_non_irab_mark = get_suffix_variants(noun, suffix_voc, enclitic) #completate the dictionary word vocalization # this allow to avoid some missed harakat before ALEF # in the dictionary form of word, all alefat are preceded by Fatha #~noun = araby.complet noun = noun.replace(araby.ALEF, araby.FATHA + araby.ALEF) noun = noun.replace(araby.ALEF_MAKSURA, araby.FATHA + araby.ALEF_MAKSURA) noun = re.sub(ur"(%s)+"%araby.FATHA , araby.FATHA, noun) # remove initial fatha if alef is the first letter noun = re.sub(ur"^(%s)+"%araby.FATHA , "", noun) # generate the non vacalized end word: the vocalized word # without the I3rab Mark # if the suffix is a short haraka word_non_irab_mark = ''.join([ proclitic_voc, noun, suffix_non_irab_mark, enclitic_voc_non_inflected]) # ajust the semivocalized form word_non_irab_mark = re.sub(ur"(%s)+"%araby.FATHA , araby.FATHA, word_non_irab_mark ) word_non_irab_mark = re.sub(ur"(%s%s%s)+"%(araby.FATHA, araby.ALEF_MAKSURA, araby.KASRATAN) , araby.FATHATAN + araby.ALEF_MAKSURA, word_non_irab_mark ) word_non_irab_mark = re.sub(ur"%s%s%s"%(araby.FATHA, araby.ALEF_MAKSURA, araby.KASRA) , araby.FATHA + araby.ALEF_MAKSURA, word_non_irab_mark ) word_non_irab_mark = re.sub(ur"%s[%s|%s|%s]"%(araby.ALEF_MAKSURA, araby.DAMMA, araby.FATHA, araby.KASRA) , araby.ALEF_MAKSURA, word_non_irab_mark ) #generate vocalized form word_vocalized = ''.join([ proclitic_voc, noun, suffix_voc, enclitic_voc]) #~word_vocalized = araby.ajust_vocalization(word_vocalized) word_vocalized = re.sub(ur"(%s)+"%araby.FATHA , araby.FATHA, word_vocalized) word_vocalized = re.sub(ur"%s%s%s"%(araby.FATHA, araby.ALEF_MAKSURA, araby.KASRATAN) , araby.FATHATAN + araby.ALEF_MAKSURA, word_vocalized) word_vocalized = re.sub(ur"%s%s%s"%(araby.FATHA, araby.ALEF_MAKSURA, araby.DAMMATAN) , araby.FATHATAN + araby.ALEF_MAKSURA, word_vocalized) word_vocalized = re.sub(ur"%s%s%s"%(araby.FATHA, araby.ALEF_MAKSURA, araby.FATHATAN) , araby.FATHATAN + araby.ALEF_MAKSURA, word_vocalized) word_vocalized = re.sub(ur"%s%s%s"%(araby.FATHA, araby.ALEF_MAKSURA, araby.KASRA) , araby.FATHA + araby.ALEF_MAKSURA, word_vocalized) word_vocalized = re.sub(ur"%s[%s|%s|%s]"%(araby.ALEF_MAKSURA, araby.DAMMA, araby.FATHA, araby.KASRA) , araby.ALEF_MAKSURA, word_vocalized) return word_vocalized, word_non_irab_mark
if araby.is_shadda(c): print "shadda", if araby.is_tatweel(c): print "tatweel", if araby.is_tashkeel(c): print "tashkeel", if araby.is_tanwin(c): print "tanwin", if araby.is_shortharaka(c): print "short haraka", if araby.is_ligature(c): print " ligature", if araby.is_ligature(c): print 'ligature', if araby.is_hamza(c): print 'hamza', if araby.is_alef(c): print 'alef', if araby.is_yehlike(c): print 'yeh', if araby.is_wawlike(c): print 'waw', if araby.is_teh(c): print 'teh', if araby.is_small(c): print 'small', if araby.is_weak(c): print 'weak', if araby.is_moon(c): print 'moon', if araby.is_sun(c): print 'sun', print araby.order(c), print word = u"الْعَرَيِيّةُ" word_list = [ u"الْعَرَيِيّةُ", u"العربية", u"الْعَرَيِيّةُ الفصحى", u"غير مشكول", "Taha", ] word1 = u"" for word in word_list: print word.encode('utf8'), '\t', if araby.is_vocalized(word): print ' is vocalized', ## if araby.isArabicstring(word): print ' iisArabicstring',
def vocalize( stop, proclitic, suffix, enclitic): """ Join the stop and its affixes, and get the vocalized form @param stop: stop found in dictionary. @type stop: unicode. @param proclitic: first level prefix. @type proclitic: unicode. @param suffix: second level suffix. @type suffix: unicode. @param enclitic: first level suffix. @type enclitic: unicode. @return: vocalized word. @rtype: unicode. """ # enclitic and procletric have only an uniq vocalization in arabic enclitic_voc = ssconst.COMP_SUFFIX_LIST_TAGS[enclitic]["vocalized"][0] proclitic_voc = ssconst.COMP_PREFIX_LIST_TAGS[proclitic]["vocalized"][0] suffix_voc = suffix#CONJ_SUFFIX_LIST_TAGS[suffix]["vocalized"][0] #adjust some some harakat #strip last if tanwin or last harakat if suffix_voc and araby.is_haraka(stop[-1:]): #(DAMMATAN, FATHATAN, KASRATAN, FATHA, DAMMA, KASRA): stop = stop[:-1] # convert Fathatan into one fatha, in some cases where # #the tanwin is not at the end: eg. محتوًى stop = stop.replace(araby.FATHATAN, araby.FATHA) #add shadda if the first letter is sunny and the procletic #contains AL definition mark if (u'تعريف' in ssconst.COMP_PREFIX_LIST_TAGS[proclitic]["tags"]\ and araby.is_sun(stop[0])): stop = u''.join([stop[0], araby.SHADDA, stop[1:]]) #strip the Skun from the lam if proclitic_voc.endswith(araby.SUKUN): proclitic_voc = proclitic_voc[:-1] # generate the word variant for some words witch ends by special #letters like Teh_marbuta or Alef_maksura, or hamza, #the variant is influed by the suffix harakat, # for example مدرسة+ي = مدرست+ي stop = get_word_variant(stop, suffix+enclitic) # generate the suffix variant. if the suffix is Teh_marbuta or #Alef_maksura, or hamza, the variant is influed by the enclitic harakat, # for example مدرس+ة+ي = مدرس+ت+ي suffix_voc, suffix_non_irab_mark = get_suffix_variants(stop, suffix_voc, enclitic) #Get the enclitic variant to be joined to the word. #For example: word = مدرس, suffix = ِة, encletic = هُ. #The enclitic is convert to HEH+ KAsra. #~enclitic_voc = self.getEncliticVariant(stop, suffix_voc, enclitic_voc) # generate the non vacalized end word: the vocalized word # without the I3rab Mark # if the suffix is a short haraka word_non_irab_mark = ''.join([ proclitic_voc, stop, suffix_non_irab_mark, enclitic_voc]) word_vocalized = ''.join([proclitic_voc, stop, suffix_voc, enclitic_voc]) return word_vocalized, word_non_irab_mark