Пример #1
0
def generate_script_matrix():
    chars5 = '[[["a","A","i","I","u","U","R","RR"],["lR","lRR","e","ai","o","au"]],[["E","O","aE","AE","aO"]],[["aM","aH","a~"]],[["ka","kha","ga","gha","Ga"],["ca","cha","ja","jha","Ja"],["Ta","Tha","Da","Dha","Na"],["ta","tha","da","dha","na"],["pa","pha","ba","bha","ma"]],[["ya","ra","la","va","za","Sa","sa","ha"]],[["La","Za","r2a","n2a"]],[["qa","qha","g2a","z2a","r3a","r3ha","fa","Ya"]],[["n*ga","n*ja","n*Da","n*da","m*ba"]],[["ka","kA","ki","kI","ku","kU","kR","kRR"],["klR","klRR","ke","kai","ko","kau"]],[["kE","kO","kaE","kAE","kaO"]],[["kaM","kaH","ka~","k"]],[["\'","oM",".",".."]]]'
    chars3 = '[[["a","A","i"],["I","u","U"],["R","RR","lR"],["lRR","e","ai"],["o","au"]],[["E","O","aE"],["AE","aO"]],[["aM","aH","a~"]],[["ka","kha","ga"],["gha","Ga","ca"],["cha","ja","jha"],["Ja","Ta","Tha"],["Da","Dha","Na"],["ta","tha","da"],["dha","na","pa"],["pha","ba","bha"],["ma"]],[["ya","ra","la"],["va","za","Sa"],["sa","ha"]],[["La","Za","r2a"],["n2a"]],[["qa","qha","g2a"],["z2a","r3a","r3ha"],["fa","Ya"]],[["n*ga","n*ja","n*Da"],["n*da","m*ba"]],[["ka","kA","ki"],["kI","ku","kU"],["kR","kRR","klR"],["klRR","ke","kai"],["ko","kau"]],[["kE","kO","kaE"],["kAE","kaO"]],[["kaM","kaH","ka~"],["k"]],[["\'","oM","."],[".."]]]'

    chars5 = json.dumps(json.loads(chars5),
                        ensure_ascii=False).replace(" ", "")
    chars3 = json.dumps(json.loads(chars3),
                        ensure_ascii=False).replace(" ", "")
    charsAll = [chars5, chars3]

    for i, chars in enumerate(charsAll):
        for guide in roman:
            results_final = {}
            results = {}
            results_hk = {}

            for script in scripts:
                results[script] = json.loads(
                    convert('HK', script, chars, False, [],
                            []).replace('،', ','))
                results_hk[script] = json.loads(
                    convert(script, 'HK',
                            json.dumps(results[script], ensure_ascii=False),
                            False, [], []))

            if guide != 'Velthuis':
                guide_chars = json.loads(
                    convert('HK', guide, chars, False, [], []))
            else:
                guide_chars = json.loads(
                    convert('HK', guide, chars, False, [],
                            []).replace('""', '"\\"'))

            results_final['results'] = results
            results_final['resultsHK'] = results_hk
            results_final['guideChars'] = guide_chars

            num = ["5", "3"]

            f = io.open("resources/script_matrix/script_matrix_" + guide +
                        num[i] + ".json",
                        mode="w",
                        encoding="utf-8")
            f.write(
                json.dumps(results_final,
                           ensure_ascii=False,
                           sort_keys=True,
                           indent=4))
            f.close()
Пример #2
0
def convert_loop_src_post():
    # print('There requests are')
    # print(request.json)

    results = {}

    # print('hereee')
    texts = json.loads(request.json['text'])

    for i, source in enumerate(request.json['sources']):
        # print(texts[source])
        text = convert(source, request.json['target'], json.dumps(texts[source],ensure_ascii=False), request.json['nativize'],
            request.json['preOptions'], request.json['postOptions'])
        text = text.replace('\n', '<br/>')
        try:
            results[source] = json.loads(text)
        except:
            print('finding')
            #print(text)
            #print(texts[source])
            #print('Exception')

    # print(results)

    return jsonify(results)
Пример #3
0
def latinmatrix_list():
    results_final = {}
    results = {}
    results_hk = {}
    guide = request.json['guide']
    scripts = request.json['scripts']
    chars = request.json['chars']

    for script in scripts:
        results[script] = convert('HK', script, json.dumps(chars).replace(' ',''), False,[],[])
        results_hk[script] = convert(script, 'HK', results[script], False,[],[])

    guide_chars = convert('HK', guide, json.dumps(chars).replace(' ',''), False,[],[])

    results_final['results'] = results
    results_final['resultsHK'] = results_hk
    results_final['guideChars'] = guide_chars

    return jsonify(results_final)
Пример #4
0
def fetch_site():
    url = request.args['url']
    r = requests.get(url)
    if "UTF-8" not in r.encoding:
        r.encoding = r.apparent_encoding

    htmlcontent = r.text

    #htmlcontent = htmlcontent.replace('href="/', 'href="' + url + '/')

    baseurl = re.sub('(https*://)([^/]+)/*.*', r'\1'+ r'\2', url,flags=re.IGNORECASE)
    baseurl = baseurl.replace('‍','')

    #print('Base URL')
    #print(baseurl)

    htmlcontent = convert(request.args['source'], request.args['target'], htmlcontent, json.loads(request.args['nativize']),
        json.loads(request.args['preOptions']), json.loads(request.args['postOptions']))

    # Replace relative paths with absolute paths
    htmlcontent=re.sub("(\")/",r"\1"+baseurl+"/",htmlcontent)
    htmlcontent=re.sub("(\.\")/",r"\1"+baseurl+"/",htmlcontent)
    htmlcontent=re.sub("(url\()\/",r"\1"+baseurl+"/",htmlcontent)

    ## Parameters

    params = 'source=' + request.args['source'] + '&target=' + request.args['target'] + '&preOptions=' + request.args['preOptions'] + '&postOptions=' + request.args['postOptions'] + '&nativize=' + request.args['nativize']

    transurl = html.escape("http://aksharamukha.appspot.com/api/website?"+params+'&url=')

    # fix double dot
    urlparts = url.split("/")
    doubledot =""
    for  i in range(0, len(urlparts)-2):
        doubledot = doubledot + urlparts[i]+ "/"

    htmlcontent=htmlcontent.replace("../",doubledot)

    ## Replace links

    htmlcontent=re.sub("(<a href\=\"?)",r"\1"+transurl,htmlcontent)
    htmlcontent=re.sub("(<a class=.*? href\=\"?)",r"\1"+transurl,htmlcontent)
    htmlcontent=re.sub("(<a target\=\"\_blank\" href\=\")",r"\1"+transurl,htmlcontent)
    htmlcontent=re.sub("(<a target\=\"\_self\" href\=\")",r"\1"+transurl,htmlcontent)

    ## Replace with native numerals

    htmlcontent = PostProcess.RetainIndicNumerals(htmlcontent, request.args['target'], True)

    ## Retain Dandas

    htmlcontent = PostProcess.RetainDandasIndic(htmlcontent, request.args['target'], True)

    return htmlcontent
Пример #5
0
def convert_loop_tgt_post():
    # print('There requests are')
    # print(request.json)

    results = {}

    for target in request.json['targets']:
        text = convert(request.json['source'], target, request.json['text'], request.json['nativize'],
            request.json['preOptions'], request.json['postOptions'])
        text = text.replace('\n', '<br/>')
        results[target] = text

    # print(results)

    return jsonify(results)
Пример #6
0
def convert_post():
    #print('There requests are')
    #print(request.json)

    if 'text' in request.json:
        text = convert(request.json['source'], request.json['target'], request.json['text'], request.json['nativize'],
            request.json['preOptions'], request.json['postOptions'])
    else:
        text = ''

    text = text.replace('\n', '<br/>')

    #print(text)

    return text
Пример #7
0
def generate_common_letters():
    letters = [
        "ka", "kA", "ki", "kI", "ku", "kU", "kR", "kE", "ke", "kai", "kO",
        "ko", "kau", "kha", "khA", "khi", "khI", "khu", "khU", "khR", "khE",
        "khe", "khai", "khO", "kho", "khau", "ga", "gA", "gi", "gI", "gu",
        "gU", "gR", "gE", "ge", "gai", "gO", "go", "gau", "gha", "ghA", "ghi",
        "ghI", "ghu", "ghU", "ghR", "ghE", "ghe", "ghai", "ghO", "gho", "ghau",
        "Ga", "GA", "Gi", "GI", "Gu", "GU", "GR", "GE", "Ge", "Gai", "GO",
        "Go", "Gau", "ca", "cA", "ci", "cI", "cu", "cU", "cR", "cE", "ce",
        "cai", "cO", "co", "cau", "cha", "chA", "chi", "chI", "chu", "chU",
        "chR", "chE", "che", "chai", "chO", "cho", "chau", "ja", "jA", "ji",
        "jI", "ju", "jU", "jR", "jE", "je", "jai", "jO", "jo", "jau", "jha",
        "jhA", "jhi", "jhI", "jhu", "jhU", "jhR", "jhE", "jhe", "jhai", "jhO",
        "jho", "jhau", "Ja", "JA", "Ji", "JI", "Ju", "JU", "JR", "JE", "Je",
        "Jai", "JO", "Jo", "Jau", "Ta", "TA", "Ti", "TI", "Tu", "TU", "TR",
        "TE", "Te", "Tai", "TO", "To", "Tau", "Tha", "ThA", "Thi", "ThI",
        "Thu", "ThU", "ThR", "ThE", "The", "Thai", "ThO", "Tho", "Thau", "Da",
        "DA", "Di", "DI", "Du", "DU", "DR", "DE", "De", "Dai", "DO", "Do",
        "Dau", "Dha", "DhA", "Dhi", "DhI", "Dhu", "DhU", "DhR", "DhE", "Dhe",
        "Dhai", "DhO", "Dho", "Dhau", "Na", "NA", "Ni", "NI", "Nu", "NU", "NR",
        "NE", "Ne", "Nai", "NO", "No", "Nau", "ta", "tA", "ti", "tI", "tu",
        "tU", "tR", "tE", "te", "tai", "tO", "to", "tau", "tha", "thA", "thi",
        "thI", "thu", "thU", "thR", "thE", "the", "thai", "thO", "tho", "thau",
        "da", "dA", "di", "dI", "du", "dU", "dR", "dE", "de", "dai", "dO",
        "do", "dau", "dha", "dhA", "dhi", "dhI", "dhu", "dhU", "dhR", "dhE",
        "dhe", "dhai", "dhO", "dho", "dhau", "na", "nA", "ni", "nI", "nu",
        "nU", "nR", "nE", "ne", "nai", "nO", "no", "nau", "pa", "pA", "pi",
        "pI", "pu", "pU", "pR", "pE", "pe", "pai", "pO", "po", "pau", "pha",
        "phA", "phi", "phI", "phu", "phU", "phR", "phE", "phe", "phai", "phO",
        "pho", "phau", "ba", "bA", "bi", "bI", "bu", "bU", "bR", "bE", "be",
        "bai", "bO", "bo", "bau", "bha", "bhA", "bhi", "bhI", "bhu", "bhU",
        "bhR", "bhE", "bhe", "bhai", "bhO", "bho", "bhau", "ma", "mA", "mi",
        "mI", "mu", "mU", "mR", "mE", "me", "mai", "mO", "mo", "mau", "ya",
        "yA", "yi", "yI", "yu", "yU", "yR", "yE", "ye", "yai", "yO", "yo",
        "yau", "ra", "rA", "ri", "rI", "ru", "rU", "rR", "rE", "re", "rai",
        "rO", "ro", "rau", "la", "lA", "li", "lI", "lu", "lU", "lR", "lE",
        "le", "lai", "lO", "lo", "lau", "va", "vA", "vi", "vI", "vu", "vU",
        "vR", "vE", "ve", "vai", "vO", "vo", "vau", "za", "zA", "zi", "zI",
        "zu", "zU", "zR", "zE", "ze", "zai", "zO", "zo", "zau", "Sa", "SA",
        "Si", "SI", "Su", "SU", "SR", "SE", "Se", "Sai", "SO", "So", "Sau",
        "sa", "sA", "si", "sI", "su", "sU", "sR", "sE", "se", "sai", "sO",
        "so", "sau", "ha", "hA", "hi", "hI", "hu", "hU", "hR", "hE", "he",
        "hai", "hO", "ho", "hau", "La", "LA", "Li", "LI", "Lu", "LU", "LR",
        "LE", "Le", "Lai", "LO", "Lo", "Lau", "Za", "ZA", "Zi", "ZI", "Zu",
        "ZU", "ZR", "ZE", "Ze", "Zai", "ZO", "Zo", "Zau", "r2a", "r2A", "r2i",
        "r2I", "r2u", "r2U", "r2R", "r2E", "r2e", "r2ai", "r2O", "r2o", "r2au",
        "n2a", "n2A", "n2i", "n2I", "n2u", "n2U", "n2R", "n2E", "n2e", "n2ai",
        "n2O", "n2o", "n2au", "a", "A", "i", "I", "u", "U", "R", "E", "e",
        "ai", "O", "o", "au", "k", "kh", "g", "gh", "G", "c", "ch", "j", "jh",
        "J", "T", "Th", "D", "Dh", "N", "t", "th", "d", "dh", "n", "p", "ph",
        "b", "bh", "m", "y", "r", "l", "v", "z", "S", "s", "h", "L", "Z", "r2",
        "n2", "aM", "aH"
    ]

    script_combinations = list(itertools.combinations(scriptsAll, 2))

    total = len(script_combinations)

    i = 0
    for script1, script2 in script_combinations:
        i = i + 1

        print('Processing ' + str(i) + ' out of ' + str(total))

        results = {}

        if script1 == 'Tamil':
            pp = 'RemoveDiacriticsTamil'
        else:
            pp = 'RemoveDiacritics'

        letters_script1 = [
            convert('HK', script1, x, False, [], [pp]) for x in letters
        ]
        letters_script2 = [
            convert('HK', script2, x, False, [], [pp]) for x in letters
        ]

        letters_script1_hk = [
            convert(script1, 'HK', x, False, [], [pp]) for x in letters_script1
        ]
        letters_script2_hk = [
            convert(script2, 'HK', x, False, [], [pp]) for x in letters_script2
        ]

        leters_common = set(letters_script1_hk) & set(letters_script2_hk)

        letters_script1_common = [
            convert('HK', script1, x, False, [], [pp]) for x in leters_common
        ]
        letters_script2_common = [
            convert('HK', script2, x, False, [], [pp]) for x in leters_common
        ]

        results[script1] = letters_script1_common
        results[script2] = letters_script2_common

        script_sort = sorted([script1, script2])
        suffix = script_sort[0] + '_' + script_sort[1]

        if script_sort[0] <= 'Buhid':
            index = '1'
        elif script_sort[0] <= 'Kharoshthi':
            index = '2'
        else:
            index = '3'

        f = io.open("resources/common_letters" + index + "/common_letters_" +
                    suffix + ".json",
                    mode="w",
                    encoding="utf-8")
        f.write(
            json.dumps(results, ensure_ascii=False, sort_keys=True, indent=4))
        f.close()
Пример #8
0
def generate_conjuncts():
    conj = '{"conjuncts1S1":["ak","akh","ag","agh","aṅ","ac","ach","aj","ajh","añ","aṭ","aṭh","aḍ","aḍh","aṇ","at","ath","ad","adh","an","ap","aph","ab","abh","am","ay","ar","al","av","aś","aṣ","as","ah","al̤"],"conjuncts2S1":["kka","kkha","kca","kcha","kṇa","kta","ktha","kna","kpa","kpha","kma","kya","kra","kla","kva","kśa","kṣa","ksa","khkha","khna","khya","khva","gga","ggha","gja","gḍa","gṇa","gda","gdha","gna","gba","gbha","gma","gya","gra","gla","gva","ghna","ghma","ghya","ghra","ghva","ṅka","ṅkha","ṅga","ṅgha","ṅṅa","ṅca","ṅja","ṅta","ṅda","ṅdha","ṅna","ṅpa","ṅbha","ṅma","ṅya","ṅra","ṅva","ṅśa","ṅsa","ṅha","cca","ccha","cña","cma","cya","cra","cva","chya","jja","jjha","jña","jma","jya","jra","jva","jha","jhña","cña","cha","ñja","ñjha","ñña","ñśa","ñha","ṭka","ṭkha","ṭca","ṭcha","ṭṭa","ṭṇa","ṭta","ṭpa","ṭpha","ṭma","ṭya","ṭva","ṭśa","ṭṣa","ṭsa","ṭhya","ḍga","ḍgha","ḍja","ḍḍa","ḍḍha","ḍda","ḍdha","ḍba","ḍbha","ḍma","ḍya","ḍra","ḍla","ḍva","ḍhya","ḍhra","ḍhva","ṇṭa","ṇṭha","ṇḍa","ṇḍha","ṇṇa","ṇna","ṇma","ṇya","ṇva","ṇha","tka","tkha","tta","ttha","tna","tpa","tpha","tma","tya","tra","tva","tṣa","tsa","thna","thya","thra","thva","dga","dgha","dda","ddha","dna","dba","dbha","dma","dya","dra","dva","dhna","dhma","dhya","dhra","dhva","nka","nkha","nga","ngha","nta","ntha","nda","ndha","nna","npa","npha","nba","nbha","nma","nya","nra","nva","nṣa","nsa","nha","pka","pkha","pca","pcha","pṭa","pṇa","pta","pna","ppa","ppha","pma","pya","pra","pla","pva","pśa","psa","bga","bja","bda","bdha","bba","bbha","bya","bra","bla","bva","bhṇa","bhna","bhma","bhya","bhra","bhla","bhva","mṇa","mna","mpa","mpha","mba","mbha","mma","mya","mra","mla","mva","mha","yya","yva","rka","rka","hra","gra","gha","rca","rcha","rja","rja","hra","ṭra","ḍra","ḍha","rṇa","rta","rtha","rda","rdha","rna","rpa","rpha","rba","rbha","rma","rya","rla","rva","rśa","rṣa","rsa","rha","lka","lga","lda","lpa","lpha","lba","lbha","lma","lya","lla","lva","lśa","lha","vṇa","vna","vya","vra","vla","śca","ścha","śna","śpa","śma","śya","śra","śla","śva","śśa","ṣka","ṣkha","ṣṭa","ṣṭha","ṣṇa","ṣpa","ṣpha","ṣma","ṣya","ṣra","ṣva","ṣṣa","ska","skha","sta","stha","sna","spa","spha","sma","sya","sra","sva","ssa","hṇa","hna","hma","hya","hra","hla","hva","l̤ha"],"conjuncts3S1":["kkra","kkla","kkva","kkṣa","ktya","ktra","ktva","kthna","kthya","knya","kpra","kpla","kmya","krya","klya","kśma","kśra","kśla","kśva","kṣṇa","kṣma","kṣya","kṣra","kṣva","ksta","kstha","ksna","kspa","kspha","ksma","ksya","ksra","ksva","ggra","gghya","gghra","gjña","gjya","gjva","gdya","gdra","gdva","gdhya","gdhra","gdhva","gnya","gbra","gbhya","gbhra","gmya","grya","grva","gvya","gvra","ghnya","ghrya","ghvya","ṅkta","ṅktha","ṅkya","ṅkra","ṅkla","ṅkva","ṅkṣa","ṅksa","ṅkhya","ṅgdha","ṅgya","ṅgra","ṅgva","ṅghna","ṅghya","ṅghra","ṅtra","ṅtva","ṅdhya","ṅnya","ṅnra","ṅpra","ṅvya","ṅvra","ṅsva","ccya","cchma","cchya","cchra","cchla","cchva","cñya","jjña","jjya","jjva","jjhya","jñya","jñva","jmya","jrya","jvya","ñcma","ñcya","ñcva","ñchna","ñchya","ñchra","ñchla","ñchva","ñjña","ñjma","ñjya","ñjva","ñśma","ñśya","ñśra","ñśla","ñśva","ṭkra","ṭkṣa","ṭṭya","ṭtra","ṭtva","ṭpra","ṭśra","ṭśla","ṭsta","ṭstha","ṭsna","ṭspa","ṭsva","ḍgya","ḍgra","ḍghra","ḍjña","ḍjya","ḍḍhya","ḍḍhva","ḍdva","ḍbra","ḍbhya","ḍbhra","ḍvya","ṇṭya","ṇṭhya","ṇḍḍha","ṇḍya","ṇḍra","ṇḍva","ṇḍhya","ṇḍhra","ṇvya","tkya","tkra","tkla","tkva","tkṣa","tkhya","ttna","ttma","ttya","ttra","ttva","ttsa","tthya","tnya","tnva","tpra","tpla","tmya","tyva","trya","trva","tvya","tska","tskha","tsta","tstha","tsna","tspa","tspha","tsma","tsya","tsra","tsva","thnya","thvya","dgra","dgla","dghna","dghra","ddya","ddra","ddva","ddhma","ddhya","ddhra","ddhva","dbra","dbhya","dbhra","dbhva","dmya","drya","drva","dvya","dvra","dhnya","dhrya","dhvya","dhvra","nkra","nkla","nkva","nkṣa","nkhya","ngra","ngla","nghna","nghra","ntta","nttha","ntma","ntya","ntra","ntva","ntsa","nthya","nddha","ndma","ndya","ndra","ndva","ndhma","ndhya","ndhra","ndhva","nnya","nnva","npra","npla","npsa","nbra","nbhra","nmya","nmra","nmla","nyva","nvya","nvra","nska","nskha","nsta","nstha","nsna","nspa","nspha","nsma","nsya","nsra","nsva","nhya","nhra","nhva","pkṣa","ptya","ptra","ptva","pnya","ppra","prya","pśya","psna","psya","psva","bgra","bjya","bdya","bdhya","bdhva","bbra","bbhya","bvya","bhrya","bhrva","bhvya","mnya","mpya","mpra","mpla","mpsa","mbya","mbra","mbva","mbhya","mbhra","mmya","mmra","mmla","mrya","rkca","rkta","rktha","rkpa","rkya","rkṣa","rksa","rkhya","rgga","rggha","rgja","rgbha","rgya","rgra","rgla","rgva","rghna","rghya","rghra","rṅkha","rṅga","rccha","rcya","rjña","rjma","rjya","rjva","rñja","rḍya","rḍhya","rṇṇa","rṇya","rṇva","rtta","rtna","rtma","rtya","rtra","rtva","rtsa","rthya","rddha","rdma","rdya","rdra","rdva","rdhna","rdhma","rdhya","rdhra","rdhva","rnya","rnva","rpya","rbra","rbhya","rbhra","rbhva","rmya","rmra","rmla","ryya","rvya","rvra","rvla","rśma","rśya","rśva","rṣṭa","rṣṭha","rṣṇa","rṣma","rṣya","rṣva","rsra","rsva","rhya","rhra","rhla","rhva","lkya","lgva","lpya","lbya","lbhya","llya","lvya","lhya","vnya","ścya","śnya","śmya","śrya","śrva","śvya","ṣkya","ṣkra","ṣkla","ṣkva","ṣkṣa","ṣṭya","ṣṭra","ṣṭva","ṣṭhya","ṣṭhva","ṣṇya","ṣṇva","ṣpya","ṣpra","ṣpla","ṣmya","skra","stma","stya","stra","stva","stsa","sthna","sthya","snya","spra","sphya","smya","srya","svya","ssya","ssva","hnya","hmya","hvya"],"conjuncts4S1":["ktrya","ktvya","kṣṇya","kṣmya","kstra","gdvya","gdhrya","ṅktya","ṅktra","ṅktva","ṅkṣṇa","ṅkṣma","ṅkṣya","ṅkṣva","ṅgdhya","ṅgdhva","ṅghrya","tkṣma","tkṣva","ttrya","tstra","tsthya","tspra","tsphya","ddvya","nttva","ntrya","ntvya","ntsta","ntstha","ntsna","ntspa","ntsya","ntsra","ntsva","nddhya","nddhva","ndrya","ndvya","ndhrya","nstra","nsphya","ptrya","psnya","rkṣṇa","rkṣya","rksva","rṅgya","rjmya","rttra","rtnya","rtrya","rtvya","rtsna","rtsya","rddhya","rdrya","rdvya","rdhnya","rśvya","rṣṭya","rṣṇya","lgvya","ṣṭrya","strya","sthnya"],"conjuncts5S1":["rtsnya"]}'
    vowels = [
        'a', 'ā', 'i', 'ī', 'u', 'ū', 'ṛ', 'ĕ', 'e', 'ai', 'ŏ', 'o', 'au',
        'aṃ', 'aḥ'
    ]

    total = len(scripts) * len(vowels)

    i = 0

    for script1 in scripts:
        results = {}
        postoptions = []

        if script1[0:3] < 'Tir':
            index = '1'
        else:
            index = '2'

        for vowel in vowels:
            i = i + 1

            print('Processing ' + str(i) + ' out of ' + str(total))

            conj2 = json.loads(conj.replace('a', vowel))

            for key, value in conj2.items():
                result_script1 = list(
                    unique_everseen([
                        convert('IAST', script1, x, False, [], [])
                        for x in value
                    ]))
                result_iast = [
                    convert(script1, 'IAST', x, False, ['removeChillus'], [])
                    for x in result_script1
                ]
                actual_result = sorted(set(value) & set(result_iast),
                                       key=value.index)

                results[key] = [
                    convert('IAST', script1, x, False, [], postoptions)
                    for x in actual_result
                ]

            f = io.open("resources/conjuncts" + index + "/conjuncts_" +
                        script1 + "_" + vowel + ".json",
                        mode="w",
                        encoding="utf-8")
            f.write(
                json.dumps(results,
                           ensure_ascii=False,
                           sort_keys=True,
                           indent=4))
            f.close()

            if (script1 == 'Sinhala' or script1 == 'Chakma'):
                postoptions = ['SinhalaConjuncts', 'ChakmaEnableAllConjuncts']

                for key, value in conj2.items():
                    result_script1 = list(
                        unique_everseen([
                            convert('IAST', script1, x, False, [], [])
                            for x in value
                        ]))
                    result_iast = [
                        convert(script1, 'IAST', x, False, ['removeChillus'],
                                []) for x in result_script1
                    ]
                    actual_result = sorted(set(value) & set(result_iast),
                                           key=value.index)

                    results[key] = [
                        convert('IAST', script1, x, False, [], postoptions)
                        for x in actual_result
                    ]

                f = io.open("resources/conjuncts" + index + "/conjuncts_" +
                            script1 + "_" + vowel + "_all.json",
                            mode="w",
                            encoding="utf-8")
                f.write(
                    json.dumps(results,
                               ensure_ascii=False,
                               sort_keys=True,
                               indent=4))
                f.close()
Пример #9
0
def generate_syllables():
    results = {}

    for script1 in scripts_syllabary:
        vowelsAll = [
            'a', 'A', 'i', 'I', 'u', 'U', 'R', 'RR', 'lR', 'lRR', 'E', 'e',
            'ai', 'O', 'o', 'au', 'aE', 'AE', 'aO', 'aM', 'aH', 'a~'
        ]

        if script1 == 'Tamil':
            pp = 'RemoveDiacriticsTamil'
        else:
            pp = 'RemoveDiacritics'

        vowelsScript1 = list(
            unique_everseen([
                convert('HK', script1, x, False, [], [pp]) for x in vowelsAll
            ]))
        vowelsScript1_hk = [
            convert(script1, 'HK', x, False, [], []) for x in vowelsScript1
        ]

        actual_vowels = sorted(set(vowelsAll) & set(vowelsScript1_hk),
                               key=vowelsAll.index)

        if script1 == 'Tamil':
            actual_vowels = [
                x for x in actual_vowels if x not in ['aE', 'AE', 'aO']
            ]

        vowelsScript1 = list(
            unique_everseen([
                convert('HK', script1, x, False, [], [pp])
                for x in actual_vowels
            ]))
        vowelsUnique = [
            convert(script1, 'HK', x, False, [], []) for x in vowelsScript1
        ]

        consonantsAll = [
            'k', 'kh', 'g', 'gh', 'G', 'c', 'ch', 'j', 'jh', 'J', 'T', 'Th',
            'D', 'Dh', 'N', 't', 'th', 'd', 'dh', 'n', 'p', 'ph', 'b', 'bh',
            'm', 'y', 'r', 'l', 'v', 'z', 'S', 's', 'h', 'Z', 'L', 'r2', 'n2',
            'q', 'qh', 'g2', 'z2', 'r3', 'r3h', 'f', 'Y'
        ]

        if script1 == 'Sinhala' or script1 == 'Rejang':
            consonantsAll = consonantsAll + ['n*g', 'n*j', 'n*D', 'n*d', 'm*b']

        consonantsScript1 = list(
            unique_everseen([
                convert('HK', script1, x + 'a', False, [], [pp])
                for x in consonantsAll
            ]))
        consonantsScript1_hk = map(removeA, [
            convert(script1, 'HK', x, False, [], []) for x in consonantsScript1
        ])
        actual_consonants = sorted(set(consonantsAll)
                                   & set(consonantsScript1_hk),
                                   key=consonantsAll.index)

        # print(actual_consonants)

        consonantsScript1 = list(
            unique_everseen([
                convert('HK', script1, x + 'a', False, [], [pp])
                for x in actual_consonants
            ]))

        consonantUnique = [
            convert(script1, 'HK', x, False, [], [pp])
            for x in consonantsScript1
        ]
        consonantUnique = [x.replace('a', '') for x in consonantUnique]

        compound = []

        if script1 == 'BatakSima':
            vowelsUnique.append('e')
            vowelsUnique.append('o')
            vowelsUnique.append('au')

        elif script1 == 'BatakManda':
            vowelsUnique.append('e')
            vowelsUnique.append('o')

        elif script1 == 'BatakPakpak':
            vowelsUnique.append('e')
            vowelsUnique.append('aE')
            vowelsUnique.append('o')

        elif script1 == 'BatakToba':
            vowelsUnique.append('e')
            vowelsUnique.append('o')

        elif script1 == 'BatakKaro':
            vowelsUnique.append('e')
            vowelsUnique.append('aE')
            vowelsUnique.append('o')
            vowelsUnique.append('aO')

        elif script1 == 'Khojki':
            vowelsUnique.append('I')

        vowelsUnique = [
            x for x in sorted(vowelsUnique, key=vowelsAll.index) if x != 'a'
        ]

        for cons in consonantUnique:
            for vow in vowelsUnique:
                compound.append(cons + vow)

            compound.append(cons)
            compound.append('&' + cons)

        if script1 == 'Multani' or script1 == 'Mahajani':
            compound = []

        compoundsScript1 = list(
            unique_everseen([
                convert('HK', script1, x, False, [], [pp]) for x in compound
            ]))
        compoundsScript1 = [
            x for x in compoundsScript1 if x not in consonantsScript1
        ]

        results['vowels'] = vowelsScript1
        results['consonants'] = consonantsScript1
        results['compounds'] = compoundsScript1

        f = io.open("resources/syllabary/syllabary_" + script1 + ".json",
                    mode="w",
                    encoding="utf-8")
        f.write(
            json.dumps(results, ensure_ascii=False, sort_keys=True, indent=4))
        f.close()
Пример #10
0
def describe_list():
    results = {}
    results['script1'] = convert('HK', request.json['script1'], json.dumps(request.json['text']).replace(' ',''), False,[],[])
    results['script2'] = convert('HK', request.json['script2'], json.dumps(request.json['text']).replace(' ',''), False,[],[])
    results['script1hk'] = convert(request.json['script1'], 'HK', results['script1'], False,[],[])
    return jsonify(results)
Пример #11
0
def conjuncts_list():
    script1 = request.json['script1']
    script2 = request.json['script2']
    vowel = request.json['vowel']

    postoptions = request.json['postoptions']

    print('The post options are :: ')
    print(postoptions)

    if script1[0:3] < 'Tir':
        index = '1'
    else:
        index = '2'

    """
    for key, value in conj.items():
        result_script1 = list(unique_everseen([convert('IAST', script1, x, False,[],[]) for x in value]))
        result_iast = [convert(script1, 'IAST', x, False,['removeChillus'],[]) for x in result_script1]
        actual_result = sorted(set(value) & set(result_iast), key=value.index)

        results[key] = [convert('IAST', script1, x, False,[], postoptions) for x in actual_result]
        results[key[:-1] + '2'] = [convert('IAST', script2, x, False,[], []) for x in actual_result]
    """

    if script1 == 'Sinhala':
        if 'SinhalaConjuncts' in postoptions:
            file = 'resources/conjuncts'+ index + '/conjuncts_' + script1 + '_' + vowel + '_all' + '.json'
        else:
            file = 'resources/conjuncts'+ index + '/conjuncts_' + script1 +  '_' + vowel + '.json'
    elif script1 == 'Chakma':
        if 'ChakmaEnableAllConjuncts' in postoptions:
            file = 'resources/conjuncts'+ index + '/conjuncts_' + script1 + '_' + vowel + '_all' + '.json'
        else:
            file = 'resources/conjuncts'+ index + '/conjuncts_' + script1 +  '_' + vowel + '.json'
    else:
        file = 'resources/conjuncts'+ index + '/conjuncts_' + script1 +  '_' + vowel + '.json'

    f = open (file, 'r', encoding='utf-8')
    conjuncts = f.read()
    conjuncts = conjuncts.replace('،', ',').replace(" ", "")
    f.close()

    if script2 != 'Velthuis':
        conjuncts_guide = convert(script1, script2, conjuncts, False,[],[])
        conjuncts_guide = PostProcess.RetainIndicNumerals(conjuncts_guide, script2, True)
        conjuncts_guide = json.loads(conjuncts_guide.replace('،', ','))
    else:
        conjuncts_guide = convert(script1, script2, conjuncts, False,[],[]).replace('""', '"\\"').replace('&"', '&\\"')
        conjuncts_guide = json.loads(PostProcess.RetainIndicNumerals(conjuncts_guide, script2, True))

    conjuncts = json.loads(conjuncts)

    results = {}

    results['conjuncts1S1'] = conjuncts['conjuncts1S1']
    results['conjuncts2S1'] = conjuncts['conjuncts2S1']
    results['conjuncts3S1'] = conjuncts['conjuncts3S1']
    results['conjuncts4S1'] = conjuncts['conjuncts4S1']
    results['conjuncts5S1'] = conjuncts['conjuncts5S1']

    results['conjuncts1S2'] = conjuncts_guide['conjuncts1S1']
    results['conjuncts2S2'] = conjuncts_guide['conjuncts2S1']
    results['conjuncts3S2'] = conjuncts_guide['conjuncts3S1']
    results['conjuncts4S2'] = conjuncts_guide['conjuncts4S1']
    results['conjuncts5S2'] = conjuncts_guide['conjuncts5S1']

    return jsonify(results)
Пример #12
0
def syllabary_list():
    """
    results = {}
    vowelsAll = ['a', 'A', 'i', 'I', 'u', 'U', 'R', 'RR', 'lR', 'lRR', 'E', 'e', 'ai', 'O', 'o', 'au', 'aE', 'AE', 'aO', 'aM', 'aH', 'a~']
    script1 = request.json['script1']
    script2 = request.json['script2']

    if script1 == 'Tamil':
        pp = 'RemoveDiacriticsTamil'
    else:
        pp = 'RemoveDiacritics'

    vowelsScript1 = list(unique_everseen([convert('HK', script1, x, False,[],[pp]) for x in vowelsAll]))
    vowelsScript1_hk = [convert(script1, 'HK', x, False,[],[]) for x in vowelsScript1]

    actual_vowels = sorted(set(vowelsAll) & set(vowelsScript1_hk), key=vowelsAll.index)

    if script1 == 'Tamil':
        actual_vowels = [x for x in actual_vowels if x not in ['aE', 'AE', 'aO']]

    vowelsScript1 = list(unique_everseen([convert('HK', script1, x, False,[],[pp]) for x in actual_vowels]))
    vowelsScript2 = [convert(script1, script2, x, False,[],[]) for x in vowelsScript1]

    vowelsUnique = [convert(script1, 'HK', x, False,[],[]) for x in vowelsScript1]

    consonantsAll = ['k', 'kh', 'g', 'gh', 'G',
        'c', 'ch', 'j', 'jh', 'J',
        'T', 'Th', 'D', 'Dh', 'N',
        't', 'th', 'd', 'dh', 'n',
        'p', 'ph', 'b', 'bh', 'm',
        'y', 'r', 'l', 'v',
        'z', 'S', 's', 'h',
        'Z', 'L', 'r2', 'n2',
        'q', 'qh', 'g2', 'z2', 'r3', 'r3h', 'f', 'Y']

    if script1 == 'Sinhala' or  script1 == 'Rejang':
        consonantsAll = consonantsAll + ['n*g', 'n*j', 'n*D', 'n*d', 'm*b']

    consonantsScript1 = list(unique_everseen([convert('HK', script1, x+'a', False,[],[pp]) for x in consonantsAll]))
    consonantsScript1_hk = map(removeA, [convert(script1, 'HK', x, False,[],[]) for x in consonantsScript1])
    actual_consonants = sorted(set(consonantsAll) & set(consonantsScript1_hk), key=consonantsAll.index)

    # print(actual_consonants)

    consonantsScript1 = list(unique_everseen([convert('HK', script1, x+'a', False,[],[pp]) for x in actual_consonants]))

    consonantsScript2 = [convert(script1, script2, x, False,[],[]) for x in consonantsScript1]

    consonantUnique = [convert(script1, 'HK', x, False,[],[pp]) for x in consonantsScript1]
    consonantUnique = [x.replace('a','') for x in consonantUnique]

    compound = []

    if script1 == 'BatakSima':
        vowelsUnique.append('e')
        vowelsUnique.append('o')
        vowelsUnique.append('au')

    elif script1 == 'BatakManda':
        vowelsUnique.append('e')
        vowelsUnique.append('o')

    elif script1 == 'BatakPakpak':
        vowelsUnique.append('e')
        vowelsUnique.append('aE')
        vowelsUnique.append('o')

    elif script1 == 'BatakToba':
        vowelsUnique.append('e')
        vowelsUnique.append('o')

    elif script1 == 'BatakKaro':
        vowelsUnique.append('e')
        vowelsUnique.append('aE')
        vowelsUnique.append('o')
        vowelsUnique.append('aO')

    elif script1 == 'Khojki':
        vowelsUnique.append('I')

    vowelsUnique = [x for x in sorted(vowelsUnique, key=vowelsAll.index) if x !='a']

    for cons in consonantUnique:
        for vow in vowelsUnique:
            compound.append(cons + vow)

        compound.append(cons)
        compound.append('&' + cons)

    if script1 == 'Multani' or script1 == 'Mahajani':
        compound = []

    compoundsScript1 = list(unique_everseen([convert('HK', script1, x, False,[],[pp]) for x in compound]))
    compoundsScript1 = [x for x in compoundsScript1  if x not in consonantsScript1]
    compoundsScript2 = [convert(script1, script2, x, False,[],[]) for x in compoundsScript1]
    """


    script1 = request.json['script1']
    script2 = request.json['script2']

    f = open ('resources/syllabary/syllabary_' + script1 + '.json', 'r', encoding='utf-8')
    syllabary = f.read()
    syllabary = syllabary.replace('،', ',').replace(" ", "")
    f.close()

    if script2 != 'Velthuis':
        syllabary_guide = convert(script1, script2, syllabary, False,[],[])
        syllabary_guide = json.loads(syllabary_guide.replace('،', ','))
    else:
        syllabary_guide = convert(script1, script2, syllabary, False,[],[]).replace('""', '"\\"').replace('&"', '&\\"')
        syllabary_guide = json.loads(syllabary_guide)

    syllabary = json.loads(syllabary)

    results = {}

    results['vowelsScript1'] = syllabary['vowels']
    results['vowelsScript2'] = syllabary_guide['vowels']
    results['consonantsScript1'] = syllabary['consonants']
    results['consonantsScript2'] = syllabary_guide['consonants']
    results['compoundsScript1'] = syllabary['compounds']
    results['compoundsScript2'] = syllabary_guide['compounds']

    return jsonify(results)