def main(args): # exmpale for encode import mysam.tagmaker as tagmaker tag_maker = tagmaker.tagMaker() taglist = [u'اسم', u'مضير متصل', u'مجرور'] tag_maker.encode(taglist) tagstr = str(tag_maker) print(tagstr.encode('utf8')) # exmaple for decode print("***exmaple for decode***") tag_maker = tagmaker.tagMaker() tagcode = 'N--;--I-;----;---' print(tag_maker.repr(tag_maker.decode(tagcode)).encode('utf8')) print("***exmaple for inflect***") tag_maker = tagmaker.tagMaker() tagcode = 'N--;M3IY;---H;---' print(tag_maker.inflect(tagcode).encode('utf8')) print("***exmaple for add tag***") tag_maker = tagmaker.tagMaker() tagcode = 'N--;--I-;----;---' tag_new = u"تعريف" tag_maker.add(tag_new) tag_new = u"اسم" tag_maker.add(tag_new) print(str(tag_maker).encode('utf8')) print("***exmaple for has tag***") tag_maker = tagmaker.tagMaker() tagcode = 'N--;--I-;----;---' tag_search = u"مجرور" print(tag_maker.has_tag(tag_search, tagcode))
def __init__(self, wordtype, version="N/A"): """ initiate the dict """ csvdict.CsvDict.__init__(self, wordtype, version) self.affixes_list = [] nb1 = 0 nb2 = 0 file_conf = os.path.join(os.path.dirname(__file__), "config/tag.config") self.tagmaker = tagmaker.tagMaker(file_conf) for procletic in snconst.COMP_PREFIX_LIST_MODEL.keys(): for encletic in snconst.COMP_SUFFIX_LIST_MODEL: #~ for procletic in snconst.COMP_PREFIX_LIST: #~ for encletic in snconst.COMP_SUFFIX_LIST: for suffix in snconst.CONJ_SUFFIX_LIST: pro_nm = araby.strip_tashkeel(procletic) enc_nm = araby.strip_tashkeel(encletic) if u"-".join([pro_nm, enc_nm]) in snconst.COMP_NOUN_AFFIXES: nb1 += 1 if nspell.verify_proaffix_affix( procletic, encletic, suffix): nb2 += 1 self.affixes_list.append( (procletic, encletic, suffix)) print nb1, nb2
def __init__( self, version="N/A", ): """ initiate the dict """ csvdict.CsvDict.__init__(self, version) file_conf = os.path.join(os.path.dirname(__file__), "config/tag.config") self.tagmaker = tagmaker.tagMaker(file_conf) self.affixer = verb_affixer.verb_affixer()
def main(args): taglists = [[u'اسم', u'هاء', u'مجرور',], u'تعريف::مرفوع:متحرك:ينون:::'.split(":"), u'المضارع المعلوم:هو:::n:'.split(":"), u':مضاف:مجرور:متحرك:ينون:::'.split(':'), ] for taglist in taglists: tag_maker = tagmaker.tagMaker() tag_maker.encode(taglist) print(u"+".join(taglist).encode('utf8')) tagstr = str(tag_maker) print(tagstr.encode('utf8')) # decode a unifed tag string print(tag_maker.repr(tag_maker.decode()).encode('utf8'))
def main(args): import pandas as pd # test all existing tags tag_maker = tagmaker.tagMaker() tagstr = str(tag_maker) print("----") for tag in tag_const.TAGSDICT: tagstr = str(tag_maker) tag_maker.add(tag) tagstr_new = str(tag_maker) if tagstr == tagstr_new: print(u" ".join(["error:old\t", tagstr, tag, "\n new:\t", tagstr_new]).encode('utf8')) else: print(u" ".join([tag, tagstr_new]).encode('utf8')) decode_tags = tag_maker.decode() df = pd.DataFrame(decode_tags) print(df) tag_maker.add(u"اسم") print("******Inflect", tag_maker.inflect().encode('utf8')) tag_maker.add(u"فعل") print("***Verb***Inflect", tag_maker.inflect().encode('utf8'))