def translitToArabic(): text = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8').read() if(getattr(args, 'delimited') != None): text = re.sub(r"(?<!\\)%(.*?)(?<!\\)%", lambda match: betaCode.betacodeToArabic(match.group(1)), text, flags=re.DOTALL) else: text = betaCode.betacodeToArabic(text) sys.stdout.buffer.write(text.encode('utf8'))
def regenerateFile(file): with open(targetFolder+file[:-4]+"_Working.txt", "r", encoding="utf8") as f: hadith = f.read() hadith = re.split(div, hadith) # arabicOriginal arabicOriginal = re.sub("\n+", "\n", hadith[2]) arabicOriginal = arabicOriginal.split("\n")[1] #input(arabicOriginal) #place to apply some additional conversions (salla allahu `alayhi wasallama > sl`m) # regenerate betaCodeAuto betaCodeAutoNew = betaCode.arabicToBetaCode(arabicOriginal) betaCodeAuto = re.sub("\n+", "\n", hadith[3]) betaCodeAuto = betaCodeAuto.split("\n") betaCodeAuto[1] = betaCodeAutoNew hadith[3] = "\n".join(betaCodeAuto) # betaCodeManual betaCodeManual = re.sub("\n+", "\n", hadith[4]) betaCodeManual = betaCodeManual.split("\n")[1] # regenerate betaCodeTranslit betaCodeTranslitNew = betaCode.betacodeToTranslit(betaCodeManual) betaCodeTranslit = re.sub("\n+", "\n", hadith[5]) betaCodeTranslit = betaCodeTranslit.split("\n") betaCodeTranslit[1] = betaCodeTranslitNew hadith[5] = "\n".join(betaCodeTranslit) # regenerate betaCodeArabic betaCodeArabicNew = betaCode.betacodeToArabic(betaCodeManual) betaCodeArabic = re.sub("\n+", "\n", hadith[6]) betaCodeArabic = betaCodeArabic.split("\n") betaCodeArabic[1] = betaCodeArabicNew hadith[6] = "\n".join(betaCodeArabic) # translationBetaCode translationBetaCode = re.sub("\n+", "\n", hadith[7]) translationBetaCode = translationBetaCode.split("\n")[1] # regenerate translationTranslit translationTranslitNew = betaCode.betacodeToTranslit(translationBetaCode) translationTranslit = re.sub("\n+", "\n", hadith[8]) translationTranslit = translationTranslit.split("\n") translationTranslit[1] = translationTranslitNew hadith[8] = "\n".join(translationTranslit) # collect the record back newWorking = "" for section in hadith[1:]: newWorking = newWorking + div + section + "\n\n" newWorking = re.sub("\n{3,}", "\n\n", newWorking) #input(newWorking) # save the text with open(targetFolder+file[:-4]+"_Working.txt", "w", encoding="utf8") as f: f.write(newWorking)
def generateFile(file): with open(sourceFolder + file, "r", encoding="utf8") as f: hadith = f.read() hadithID = re.search(r"hadithID::(.*)\n", hadith).group(1).strip() hadithText = re.search(r"arHadithInit::(.*)\n", hadith).group(1).strip() # reformat optatives and other recognizeable elements # form the text hadithHeader = div + "## hadithID:: %s\n## Completed by:: ADDYOURNAME\n## Finished:: NO/YES\n\n" % hadithID arabicOriginal = div + "## ArabicInitial\n%s\n\n" % hadithText betaCodeAutoVar = betaCode.arabicToBetaCode(hadithText) betaCodeAuto = div + "## betaCodeAuto\n%s\n\n" % betaCodeAutoVar betaCodeManual = div + "## betaCodeManual\n%s\n\n" % betaCodeAutoVar betaCodeTranslit = div + "## betaCodeManual>Translit\n%s\n\n" % betaCode.betacodeToTranslit( betaCodeAutoVar) betaCodeArabic = div + "## betaCodeManual>Arabic\n%s\n\n" % betaCode.betacodeToArabic( betaCodeAutoVar) translationBetaCode = div + "## TranslationBetaCode\nType your translation on this line\n\n" translationTranslit = div + "## TranslationTranslit\nTranslation with transliterated names will appear here\n\n" newWorkingFile = hadithHeader + arabicOriginal + betaCodeAuto + betaCodeManual + betaCodeTranslit + betaCodeArabic + translationBetaCode + translationTranslit # save the text with open(targetFolder + file[:-4] + "_Working.txt", "w", encoding="utf8") as f: f.write(newWorkingFile)
def arabicAllFiles(mainFolder): print("converting to Arabic: %s" % mainFolder) testLine = "###KEEP#THIS#LINE#TEXT#WILL#BE#GENERATED#BELOW#\n" listOfFiles = os.listdir(mainFolder) for file in listOfFiles: with open(mainFolder+file, "r", encoding="utf8") as f: text = f.read() test = re.search(testLine, text) if test: print("converting to Arabic %s" % file) text = re.split(testLine, text) topLine = re.search("###BETACODE#.*\n", text[0]).group() nbLine = re.search("NB:.*\n", text[0]).group() textToConvert = re.sub("###BETACODE#.*\n|NB:.*\n", "", text[0]) translitOTO = "###TRANSLIT#ONE#TO#ONE###\n\n%s" % betaCode.betacodeToTranslit(textToConvert) translitLOC = "###TRANSLIT#ARABIC#SCRIPT###\n\n%s" % betaCode.betacodeToArabic(textToConvert) newText = text[0] + "\n" + testLine + translitOTO + translitLOC newText = re.sub("\n{2,}", "\n\n", newText) with open(mainFolder+file, "w", encoding="utf8") as f: f.write(newText) else: print("""File %s does not have the trigger line.\nIf you want the contents of this file converted, add the following line at the end of the file:\n\n%s\n\n""" % (file, testLine))
def processArabicQuotes(file): with open(file, "r", encoding="utf8") as f: text = f.read() for i in re.finditer(r"(<!--@@.*?-->\n)(<p class=\"arabic\">.*?</p>)?", text): print(i.group(1)[6:-4]) iNew = betaCode.betacodeToArabic(i.group(1)[6:-4]) text = text.replace(i.group(), "%s<p class=\"arabic\">%s</p>" % (i.group(1), iNew)) with open(file, "w", encoding="utf8") as f: f.write(text) print("To Arabic: %s has been processed..." % file)
def processArabicQuotes(input_file): with open(input_file, "r", encoding="utf8") as f: text = f.read() for i in re.finditer(r"(<!--@@.*?-->\n)(<p class=\"arabic\">.*?</p>)?", text): # print(i.group(1)[6:-4]) iNew = betaCode.betacodeToArabic(i.group(1)[6:-4]) text = text.replace(i.group(), "%s<p class=\"arabic\">%s</p>" % (i.group(1), iNew)) output_file = input_file with open(output_file, "w", encoding="utf8") as f: f.write(text) # print(output_file) print("To Translit: {} has been processed as {}.".format(input_file, output_file))
def processArabicQuotes(input_file): with open(input_file, "r", encoding="utf8") as f: text = f.read() for i in re.finditer(r"(<!--@@.*?-->\n)(<p class=\"arabic\">.*?</p>)?", text): # print(i.group(1)[6:-4]) iNew = betaCode.betacodeToArabic(i.group(1)[6:-4]) text = text.replace( i.group(), "%s<p class=\"arabic\">%s</p>" % (i.group(1), iNew)) output_file = input_file with open(output_file, "w", encoding="utf8") as f: f.write(text) # print(output_file) print("To Translit: {} has been processed as {}.".format( input_file, output_file))
def generateFile(file): with open(sourceFolder+file, "r", encoding="utf8") as f: hadith = f.read() hadithID = re.search(r"hadithID::(.*)\n", hadith).group(1).strip() hadithText = re.search(r"arHadithInit::(.*)\n", hadith).group(1).strip() # reformat optatives and other recognizeable elements # form the text hadithHeader = div + "## hadithID:: %s\n## Completed by:: ADDYOURNAME\n## Finished:: NO/YES\n\n" % hadithID arabicOriginal = div + "## ArabicInitial\n%s\n\n" % hadithText betaCodeAutoVar = betaCode.arabicToBetaCode(hadithText) betaCodeAuto = div + "## betaCodeAuto\n%s\n\n" % betaCodeAutoVar betaCodeManual = div + "## betaCodeManual\n%s\n\n" % betaCodeAutoVar betaCodeTranslit = div + "## betaCodeManual>Translit\n%s\n\n" % betaCode.betacodeToTranslit(betaCodeAutoVar) betaCodeArabic = div + "## betaCodeManual>Arabic\n%s\n\n" % betaCode.betacodeToArabic(betaCodeAutoVar) translationBetaCode = div + "## TranslationBetaCode\nType your translation on this line\n\n" translationTranslit = div + "## TranslationTranslit\nTranslation with transliterated names will appear here\n\n" newWorkingFile = hadithHeader + arabicOriginal + betaCodeAuto + betaCodeManual + betaCodeTranslit + betaCodeArabic + translationBetaCode + translationTranslit # save the text with open(targetFolder+file[:-4]+"_Working.txt", "w", encoding="utf8") as f: f.write(newWorkingFile)
def regenerateFile(file): with open(targetFolder + file[:-4] + "_Working.txt", "r", encoding="utf8") as f: hadith = f.read() hadith = re.split(div, hadith) # arabicOriginal arabicOriginal = re.sub("\n+", "\n", hadith[2]) arabicOriginal = arabicOriginal.split("\n")[1] #input(arabicOriginal) #place to apply some additional conversions (salla allahu `alayhi wasallama > sl`m) # regenerate betaCodeAuto betaCodeAutoNew = betaCode.arabicToBetaCode(arabicOriginal) betaCodeAuto = re.sub("\n+", "\n", hadith[3]) betaCodeAuto = betaCodeAuto.split("\n") betaCodeAuto[1] = betaCodeAutoNew hadith[3] = "\n".join(betaCodeAuto) # betaCodeManual betaCodeManual = re.sub("\n+", "\n", hadith[4]) betaCodeManual = betaCodeManual.split("\n")[1] # regenerate betaCodeTranslit betaCodeTranslitNew = betaCode.betacodeToTranslit(betaCodeManual) betaCodeTranslit = re.sub("\n+", "\n", hadith[5]) betaCodeTranslit = betaCodeTranslit.split("\n") betaCodeTranslit[1] = betaCodeTranslitNew hadith[5] = "\n".join(betaCodeTranslit) # regenerate betaCodeArabic betaCodeArabicNew = betaCode.betacodeToArabic(betaCodeManual) betaCodeArabic = re.sub("\n+", "\n", hadith[6]) betaCodeArabic = betaCodeArabic.split("\n") betaCodeArabic[1] = betaCodeArabicNew hadith[6] = "\n".join(betaCodeArabic) # translationBetaCode translationBetaCode = re.sub("\n+", "\n", hadith[7]) translationBetaCode = translationBetaCode.split("\n")[1] # regenerate translationTranslit translationTranslitNew = betaCode.betacodeToTranslit( translationBetaCode) translationTranslit = re.sub("\n+", "\n", hadith[8]) translationTranslit = translationTranslit.split("\n") translationTranslit[1] = translationTranslitNew hadith[8] = "\n".join(translationTranslit) # collect the record back newWorking = "" for section in hadith[1:]: newWorking = newWorking + div + section + "\n\n" newWorking = re.sub("\n{3,}", "\n\n", newWorking) #input(newWorking) # save the text with open(targetFolder + file[:-4] + "_Working.txt", "w", encoding="utf8") as f: f.write(newWorking)
def generateNewCSV(): rCount = 0 print("Reformatting Cornu_All_Final.txt ...") newList = [] with open("raw_Cornu_All_Final.txt", "r", encoding="utf8") as f: f = f.read().split("\n") for l in f[1:]: l = l.split("\t") if l[6] != 'noData': #input(l) nameRaw = l[6] lon = l[0] lat = l[1] kwRaw = l[4] catRaw = categorizeTop(kwRaw) region = l[2] # place URI placeURI = generateUri(lon, lat, nameRaw, catRaw[:1], region) #input(placeURI) # names nameRaw = re.sub("[#\?]|-i", "", nameRaw).strip() names = nameRaw.split("/") mName = names[0] #input(mName) mNameAr = bc.betacodeToArabic(names[0]) if len(names) > 0: oNames = ", ".join(names) oNamesAr = bc.betacodeToArabic("، ".join(names)) searchNames = bc.betacodeToSearch(oNames) #arBW = mgr.translitArabic(oNamesAr) arBW = bc.betacodeToSearch(oNames) if nameRaw.startswith("Rout"): mName = nameRaw mNameAr = nameRaw oNames = nameRaw oNamesAr = nameRaw searchNames = nameRaw arBW = nameRaw rCount += 1 #print(nameRaw) print(placeURI) newVal = [region,lon,lat,catRaw[2:],l[5],mName,mNameAr,\ placeURI,oNames,oNamesAr,searchNames,arBW] #print(newVal) newList.append("\t".join(newVal)) print("Total number: %d" % len(newList)) print("Total number without duplicates: %d" % len(list(set(newList)))) header = "\t".join(["region","lon","lat","topType","topTypeAlt","translitTitle","arTitle",\ "topURI","translitTitleOther","arTitleOther","searchNames","arBW"]) newData = "\n".join(sorted(newList)) newData = bc.deNoise(newData) print(rCount) with open("Cornu_All_Final_Reformatted.txt", "w", encoding="utf8") as f: f.write(header+"\n"+newData)