示例#1
0
def translitToArabic():
    text = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8').read()

    if(getattr(args, 'delimited') != None):
      text = re.sub(r"(?<!\\)%(.*?)(?<!\\)%", lambda match: betaCode.betacodeToArabic(match.group(1)), text, flags=re.DOTALL)

    else:
      text = betaCode.betacodeToArabic(text)

    sys.stdout.buffer.write(text.encode('utf8'))
示例#2
0
def regenerateFile(file):
    with open(targetFolder+file[:-4]+"_Working.txt", "r", encoding="utf8") as f:
        hadith = f.read()

        hadith = re.split(div, hadith)

        # arabicOriginal
        arabicOriginal = re.sub("\n+", "\n", hadith[2])
        arabicOriginal = arabicOriginal.split("\n")[1]
        #input(arabicOriginal)
        #place to apply some additional conversions (salla allahu `alayhi wasallama > sl`m)

        # regenerate betaCodeAuto
        betaCodeAutoNew = betaCode.arabicToBetaCode(arabicOriginal)
        betaCodeAuto = re.sub("\n+", "\n", hadith[3])
        betaCodeAuto = betaCodeAuto.split("\n")
        betaCodeAuto[1] = betaCodeAutoNew
        hadith[3] = "\n".join(betaCodeAuto)

        # betaCodeManual
        betaCodeManual = re.sub("\n+", "\n", hadith[4])
        betaCodeManual = betaCodeManual.split("\n")[1]

        # regenerate betaCodeTranslit
        betaCodeTranslitNew = betaCode.betacodeToTranslit(betaCodeManual)
        betaCodeTranslit = re.sub("\n+", "\n", hadith[5])
        betaCodeTranslit = betaCodeTranslit.split("\n")
        betaCodeTranslit[1] = betaCodeTranslitNew
        hadith[5] = "\n".join(betaCodeTranslit)

        # regenerate betaCodeArabic
        betaCodeArabicNew = betaCode.betacodeToArabic(betaCodeManual)
        betaCodeArabic = re.sub("\n+", "\n", hadith[6])
        betaCodeArabic = betaCodeArabic.split("\n")
        betaCodeArabic[1] = betaCodeArabicNew
        hadith[6] = "\n".join(betaCodeArabic)

        # translationBetaCode
        translationBetaCode = re.sub("\n+", "\n", hadith[7])
        translationBetaCode = translationBetaCode.split("\n")[1]

        # regenerate translationTranslit
        translationTranslitNew = betaCode.betacodeToTranslit(translationBetaCode)
        translationTranslit = re.sub("\n+", "\n", hadith[8])
        translationTranslit = translationTranslit.split("\n")
        translationTranslit[1] = translationTranslitNew
        hadith[8] = "\n".join(translationTranslit)

        # collect the record back
        newWorking = ""
        for section in hadith[1:]:
            newWorking = newWorking + div + section + "\n\n"

        newWorking = re.sub("\n{3,}", "\n\n", newWorking)
        
        #input(newWorking)

        # save the text
        with open(targetFolder+file[:-4]+"_Working.txt", "w", encoding="utf8") as f:
            f.write(newWorking)
示例#3
0
def generateFile(file):
    with open(sourceFolder + file, "r", encoding="utf8") as f:
        hadith = f.read()
        hadithID = re.search(r"hadithID::(.*)\n", hadith).group(1).strip()
        hadithText = re.search(r"arHadithInit::(.*)\n",
                               hadith).group(1).strip()

        # reformat optatives and other recognizeable elements

        # form the text
        hadithHeader = div + "## hadithID::     %s\n## Completed by:: ADDYOURNAME\n## Finished::     NO/YES\n\n" % hadithID
        arabicOriginal = div + "## ArabicInitial\n%s\n\n" % hadithText
        betaCodeAutoVar = betaCode.arabicToBetaCode(hadithText)
        betaCodeAuto = div + "## betaCodeAuto\n%s\n\n" % betaCodeAutoVar
        betaCodeManual = div + "## betaCodeManual\n%s\n\n" % betaCodeAutoVar
        betaCodeTranslit = div + "## betaCodeManual>Translit\n%s\n\n" % betaCode.betacodeToTranslit(
            betaCodeAutoVar)
        betaCodeArabic = div + "## betaCodeManual>Arabic\n%s\n\n" % betaCode.betacodeToArabic(
            betaCodeAutoVar)
        translationBetaCode = div + "## TranslationBetaCode\nType your translation on this line\n\n"
        translationTranslit = div + "## TranslationTranslit\nTranslation with transliterated names will appear here\n\n"

        newWorkingFile = hadithHeader + arabicOriginal + betaCodeAuto + betaCodeManual + betaCodeTranslit + betaCodeArabic + translationBetaCode + translationTranslit

        # save the text
        with open(targetFolder + file[:-4] + "_Working.txt",
                  "w",
                  encoding="utf8") as f:
            f.write(newWorkingFile)
def arabicAllFiles(mainFolder):
    print("converting to Arabic: %s" % mainFolder)
    testLine = "###KEEP#THIS#LINE#TEXT#WILL#BE#GENERATED#BELOW#\n"
    listOfFiles = os.listdir(mainFolder)
    for file in listOfFiles:
        with open(mainFolder+file, "r", encoding="utf8") as f:
            text = f.read()
            test = re.search(testLine, text)
            if test:
                print("converting to Arabic %s" % file)
                text = re.split(testLine, text)
                topLine = re.search("###BETACODE#.*\n", text[0]).group()
                nbLine = re.search("NB:.*\n", text[0]).group()
                textToConvert = re.sub("###BETACODE#.*\n|NB:.*\n", "", text[0])

                translitOTO = "###TRANSLIT#ONE#TO#ONE###\n\n%s" % betaCode.betacodeToTranslit(textToConvert)
                translitLOC = "###TRANSLIT#ARABIC#SCRIPT###\n\n%s" % betaCode.betacodeToArabic(textToConvert)

                newText = text[0] + "\n" + testLine + translitOTO + translitLOC
                newText = re.sub("\n{2,}", "\n\n", newText)

                with open(mainFolder+file, "w", encoding="utf8") as f:
                    f.write(newText)
            else:
                print("""File %s does not have the trigger line.\nIf you want the contents of this file converted, add the following line at the end of the file:\n\n%s\n\n""" % (file, testLine))
示例#5
0
def processArabicQuotes(file):
    with open(file, "r", encoding="utf8") as f:
        text = f.read()
        for i in re.finditer(r"(<!--@@.*?-->\n)(<p class=\"arabic\">.*?</p>)?", text):
            print(i.group(1)[6:-4])
            iNew = betaCode.betacodeToArabic(i.group(1)[6:-4])
            text = text.replace(i.group(), "%s<p class=\"arabic\">%s</p>" % (i.group(1), iNew))
        with open(file, "w", encoding="utf8") as f:
            f.write(text)
        print("To Arabic: %s has been processed..." % file)
def processArabicQuotes(input_file):
    with open(input_file, "r", encoding="utf8") as f:
        text = f.read()
        for i in re.finditer(r"(<!--@@.*?-->\n)(<p class=\"arabic\">.*?</p>)?", text):
            # print(i.group(1)[6:-4])
            iNew = betaCode.betacodeToArabic(i.group(1)[6:-4])
            text = text.replace(i.group(), "%s<p class=\"arabic\">%s</p>" % (i.group(1), iNew))
        output_file = input_file
        with open(output_file, "w", encoding="utf8") as f:
            f.write(text)
            # print(output_file)
        print("To Translit: {} has been processed as {}.".format(input_file, output_file))
示例#7
0
def processArabicQuotes(input_file):
    with open(input_file, "r", encoding="utf8") as f:
        text = f.read()
        for i in re.finditer(r"(<!--@@.*?-->\n)(<p class=\"arabic\">.*?</p>)?",
                             text):
            # print(i.group(1)[6:-4])
            iNew = betaCode.betacodeToArabic(i.group(1)[6:-4])
            text = text.replace(
                i.group(), "%s<p class=\"arabic\">%s</p>" % (i.group(1), iNew))
        output_file = input_file
        with open(output_file, "w", encoding="utf8") as f:
            f.write(text)
            # print(output_file)
        print("To Translit: {} has been processed as {}.".format(
            input_file, output_file))
示例#8
0
def generateFile(file):
    with open(sourceFolder+file, "r", encoding="utf8") as f:
        hadith = f.read()
        hadithID = re.search(r"hadithID::(.*)\n", hadith).group(1).strip()
        hadithText = re.search(r"arHadithInit::(.*)\n", hadith).group(1).strip()
        
        # reformat optatives and other recognizeable elements
        
        # form the text
        hadithHeader = div + "## hadithID::     %s\n## Completed by:: ADDYOURNAME\n## Finished::     NO/YES\n\n" % hadithID
        arabicOriginal = div + "## ArabicInitial\n%s\n\n" % hadithText
        betaCodeAutoVar = betaCode.arabicToBetaCode(hadithText)
        betaCodeAuto = div + "## betaCodeAuto\n%s\n\n" % betaCodeAutoVar
        betaCodeManual = div + "## betaCodeManual\n%s\n\n" % betaCodeAutoVar
        betaCodeTranslit = div + "## betaCodeManual>Translit\n%s\n\n" % betaCode.betacodeToTranslit(betaCodeAutoVar)
        betaCodeArabic = div + "## betaCodeManual>Arabic\n%s\n\n" % betaCode.betacodeToArabic(betaCodeAutoVar)
        translationBetaCode = div + "## TranslationBetaCode\nType your translation on this line\n\n"
        translationTranslit = div + "## TranslationTranslit\nTranslation with transliterated names will appear here\n\n"

        newWorkingFile = hadithHeader + arabicOriginal + betaCodeAuto + betaCodeManual + betaCodeTranslit + betaCodeArabic + translationBetaCode + translationTranslit

        # save the text
        with open(targetFolder+file[:-4]+"_Working.txt", "w", encoding="utf8") as f:
            f.write(newWorkingFile)
示例#9
0
def regenerateFile(file):
    with open(targetFolder + file[:-4] + "_Working.txt", "r",
              encoding="utf8") as f:
        hadith = f.read()

        hadith = re.split(div, hadith)

        # arabicOriginal
        arabicOriginal = re.sub("\n+", "\n", hadith[2])
        arabicOriginal = arabicOriginal.split("\n")[1]
        #input(arabicOriginal)
        #place to apply some additional conversions (salla allahu `alayhi wasallama > sl`m)

        # regenerate betaCodeAuto
        betaCodeAutoNew = betaCode.arabicToBetaCode(arabicOriginal)
        betaCodeAuto = re.sub("\n+", "\n", hadith[3])
        betaCodeAuto = betaCodeAuto.split("\n")
        betaCodeAuto[1] = betaCodeAutoNew
        hadith[3] = "\n".join(betaCodeAuto)

        # betaCodeManual
        betaCodeManual = re.sub("\n+", "\n", hadith[4])
        betaCodeManual = betaCodeManual.split("\n")[1]

        # regenerate betaCodeTranslit
        betaCodeTranslitNew = betaCode.betacodeToTranslit(betaCodeManual)
        betaCodeTranslit = re.sub("\n+", "\n", hadith[5])
        betaCodeTranslit = betaCodeTranslit.split("\n")
        betaCodeTranslit[1] = betaCodeTranslitNew
        hadith[5] = "\n".join(betaCodeTranslit)

        # regenerate betaCodeArabic
        betaCodeArabicNew = betaCode.betacodeToArabic(betaCodeManual)
        betaCodeArabic = re.sub("\n+", "\n", hadith[6])
        betaCodeArabic = betaCodeArabic.split("\n")
        betaCodeArabic[1] = betaCodeArabicNew
        hadith[6] = "\n".join(betaCodeArabic)

        # translationBetaCode
        translationBetaCode = re.sub("\n+", "\n", hadith[7])
        translationBetaCode = translationBetaCode.split("\n")[1]

        # regenerate translationTranslit
        translationTranslitNew = betaCode.betacodeToTranslit(
            translationBetaCode)
        translationTranslit = re.sub("\n+", "\n", hadith[8])
        translationTranslit = translationTranslit.split("\n")
        translationTranslit[1] = translationTranslitNew
        hadith[8] = "\n".join(translationTranslit)

        # collect the record back
        newWorking = ""
        for section in hadith[1:]:
            newWorking = newWorking + div + section + "\n\n"

        newWorking = re.sub("\n{3,}", "\n\n", newWorking)

        #input(newWorking)

        # save the text
        with open(targetFolder + file[:-4] + "_Working.txt",
                  "w",
                  encoding="utf8") as f:
            f.write(newWorking)
def generateNewCSV():
    rCount = 0
    
    print("Reformatting Cornu_All_Final.txt ...")
    newList = []
    with open("raw_Cornu_All_Final.txt", "r", encoding="utf8") as f:
        f = f.read().split("\n")
        for l in f[1:]:
            l = l.split("\t")
            if l[6] != 'noData':
                #input(l)
                nameRaw = l[6]
                lon = l[0]
                lat = l[1]
                kwRaw = l[4]
                catRaw = categorizeTop(kwRaw)
                region = l[2]

                # place URI
                placeURI = generateUri(lon, lat, nameRaw, catRaw[:1], region)
                #input(placeURI)

                # names
                nameRaw   = re.sub("[#\?]|-i", "", nameRaw).strip()
                names   = nameRaw.split("/")
                mName   = names[0]
                #input(mName)
                mNameAr = bc.betacodeToArabic(names[0])
                if len(names) > 0:
                    oNames  = ", ".join(names)
                    oNamesAr = bc.betacodeToArabic("، ".join(names))

                searchNames = bc.betacodeToSearch(oNames)
                #arBW = mgr.translitArabic(oNamesAr)
                arBW = bc.betacodeToSearch(oNames)

                if nameRaw.startswith("Rout"):
                    mName       = nameRaw
                    mNameAr     = nameRaw
                    oNames      = nameRaw
                    oNamesAr    = nameRaw
                    searchNames = nameRaw
                    arBW        = nameRaw

                    rCount += 1

                    #print(nameRaw)
                    print(placeURI)

                newVal = [region,lon,lat,catRaw[2:],l[5],mName,mNameAr,\
                          placeURI,oNames,oNamesAr,searchNames,arBW]
                #print(newVal)
                newList.append("\t".join(newVal))

    print("Total number: %d" % len(newList))
    print("Total number without duplicates: %d" % len(list(set(newList))))

    header = "\t".join(["region","lon","lat","topType","topTypeAlt","translitTitle","arTitle",\
                          "topURI","translitTitleOther","arTitleOther","searchNames","arBW"])
    newData = "\n".join(sorted(newList))
    newData = bc.deNoise(newData)

    print(rCount)
    with open("Cornu_All_Final_Reformatted.txt", "w", encoding="utf8") as f:
        f.write(header+"\n"+newData)