def run_compare(): Helper.mkdir_p("results") open("results/length_comparison.txt", "wb+").close() # just create it for now. wlc_index_xml = ET.parse("source/TanachIndex.xml") # this lists num of chapters and verses for all books in the WLC books_xml_r = wlc_index_xml.getroot().find("tanach") for book in books_xml_r.findall("book"): do_book_comparison(book)
def save_parsed_text(text, record="shmuel", part=""): if record == "shmuel": b = u"Tiferet Shmuel on " + masechet a = u"Tiferet_Shmuel_on" elif record == "yomtov": b = u"Maadaney Yom Tov on " + masechet a = u"Maadaney_Yom_Tov_on" elif record == "chamudot": a = u"Divrey_Chamudot_on" b = u"Divrey Chamudot on " + masechet text_whole = { "title": b, "versionTitle": "Vilna Edition", "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957", "language": "he", "text": text, "digitizedBySefaria": True, "license": "Public Domain", "licenseVetted": True, "status": "locked", } #save Helper.mkdir_p("../preprocess_json/") if part == "": with open("../preprocess_json/" + a + "_%s.json" % masechet, 'w') as out: json.dump(text_whole, out) else: with open( "../preprocess_json/" + a + "_{}_{}.json".format(masechet, part), 'w') as out: json.dump(text_whole, out)
def save_parsed_text(text, commentator): print commentator if "Korban Netanel" in commentator: a = u" קרבן נתנאל על " + masechet_he b = u"Korban Netanel on " + masechet if "Pilpula Charifta" in commentator: b = u"Pilpula Charifta on " + masechet a = u" פילפולא חריפתא על " + masechet_he text_whole = { "title": b + masechet, "versionTitle": "Vilna Edition", "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957", "language": "he", "text": text, "digitizedBySefaria": True, "license": "Public Domain", "licenseVetted": True, "status": "locked", } #save Helper.mkdir_p("../preprocess_json/") saved_commetator = re.sub(" ", "_", commentator.strip()) with open("../preprocess_json/" + saved_commetator + "_%s.json" % masechet, 'w') as out: json.dump(text_whole, out)
def save_parsed_text(text, record = "shmuel" ): if record == "shmuel": b = u"Tiferet Shmuel on " + masechet a = u"Tiferet_Shmuel_on" elif record == "yomtov": b= u"Maadaney Yom Tov on " + masechet a = u"Maadaney_Yom_Tov_on" elif record == "chamudot": a = u"Divrey_Chamudot_on" b = u"Divrey Chamudot on " + masechet text_whole = { "title": b, "versionTitle": "Vilna Edition", "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957", "language": "he", "text": text, "digitizedBySefaria": True, "license": "Public Domain", "licenseVetted": True, "status": "locked", } #save Helper.mkdir_p("preprocess_json/") with open("preprocess_json/" + a + "_%s.json" % masechet , 'w') as out: json.dump(text_whole, out)
def run_compare(): Helper.mkdir_p('results') open("results/length_comparison.txt", 'wb+').close() #just create it for now. wlc_index_xml = ET.parse( 'source/TanachIndex.xml' ) #this lists num of chapters and verses for all books in the WLC books_xml_r = wlc_index_xml.getroot().find('tanach') for book in books_xml_r.findall('book'): do_book_comparison(book)
def save_parsed_text(text): text_whole = { "title": 'Chidushei Agadot on %s' %masechet, "versionTitle": "Vilna Edition", "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957", "language": "he", "text": text, } Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Chidushei_Agadot_%s.json" % masechet, 'w') as out: json.dump(text_whole, out)
def save_parsed_text(text): text_whole = { "title": 'Rosh on Taanit', "versionTitle": "Vilna, 1842", "versionSource": "???", "language": "he", "text": text, } #save Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Rosh_on_Taanit.json", 'w') as out: json.dump(text_whole, out)
def save_parsed_text(text): #print ref #JSON obj matching the API requirements text_whole = { "title": 'Noda Byehuda', "versionTitle": " ", "versionSource": " ", "language": "he", "text": text, } Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Noda_Byehuda.json", 'w') as out: json.dump(text_whole, out)
def save_parsed_text(text): #print ref #JSON obj matching the API requirements text_whole = { "title": 'Teshuvot harambam', "versionTitle": "Leipzig : H.L. Shnuis, 1859", "versionSource": "http://www.worldcat.org/oclc/233123481", "language": "he", "text": text, } #save Helper.mkdir_p("preprocess_json/") with open("preprocess_json/teshuvot_haRambam.json", 'w') as out: json.dump(text_whole, out)
def save_parsed_text(text): #print ref #JSON obj matching the API requirements text_whole = { "title": 'Meshech Hochma', "versionTitle": "Srikot", "versionSource": "", "language": "he", "text": text, } #save Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Meshech Hochma.json", 'w') as out: json.dump(text_whole, out)
def save_parsed_text(text): #print ref #JSON obj matching the API requirements text_whole = { "title": 'Rashi on Genesis', "versionTitle": "Pentateuch with Rashi's commentary by M. Rosenbaum and A.M. Silbermann", "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001969084", "language": "he", "text": text, } #save Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Rashi_on_Genesis.json", 'w') as out: json.dump(text_whole, out)
def save_parsed_text(text): #print ref #JSON obj matching the API requirements text_whole = { "title": "Haamek Davar on Exodus", "versionTitle": "Vilna : 1879", "versionSource": "http://babel.hathitrust.org/cgi/pt?id=uc1.31158011185906", "language": "he", "text": text, } #save Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Haamek_Davar_on_Exodus.json", 'w') as out: json.dump(text_whole, out)
def save_parsed_text(text): #print ref #JSON obj matching the API requirements text_whole = { "title": 'Radak on Genesis', "versionTitle": "Presburg : A. Schmid, 1842", "versionSource": "http://www.worldcat.org/title/perush-radak-al-ha-torah-sefer-bereshit/oclc/867743220", "language": "he", "text": text, } #save Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Radak on Genesis.json", 'w') as out: json.dump(text_whole, out)
def save_parsed_text(book_name, text): print "hello" text_whole = { "title": book_name, "versionTitle": "On Your Way", "versionSource": "http://mobile.tora.ws/", "language": "he", "text": text, } # save Helper.mkdir_p("preprocess_json/Ben Ish Chai") with open("preprocess_json/Ben Ish Chai/" + book_name + ".json", 'w') as out: json.dump(text_whole, out) post_to_api(book_name)
def save_parsed_text(text): #print ref #JSON obj matching the API requirements text_whole = { "title": 'Mishnah'.format(masechet), "versionTitle": "The Mishna with Obadiah Bartenura by Rabbi Shraga Silverstein", "versionSource": "http://www.sefaria.org/shraga-silverstein", "language": "en", "text": text, } #save Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Shraga_Silverstein_translation_on_{}.json".format(masechet), 'w') as out: json.dump(text_whole, out)
def save_parsed_text(text): #print ref #JSON obj matching the API requirements text_whole = { "title": 'II Chronicles', "versionTitle": "The Rashi chumash by Rabbi Shraga Silverstein", "versionSource": "http://www.sefaria.org/shraga-silverstein", "language": "en", "text": text, } #save Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Shraga_Silverstein_translation_on_II_Chronicles.json", 'w') as out: json.dump(text_whole, out)
def save_parsed_text(text): #print ref #JSON obj matching the API requirements text_whole = { "title": '%s' % commentator , "versionTitle": "Vilna Edition", "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957", "language": "he", "text": text, } #save Helper.mkdir_p("preprocess_json/") with open("preprocess_json/{}_on_{}.json".format(commentator,masechet), 'w') as out: json.dump(text_whole, out)
def save_default_text(text): text_whole = { "title": 'Rosh on %s' % masechet, "versionTitle": "Vilna Edition", "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957", "language": "he", "text": text, "digitizedBySefaria": True, "license": "Public Domain", "licenseVetted": True, "status": "locked", } Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Rosh_on_{}.json".format(masechet), 'w') as out: json.dump(text_whole, out)
def save_parsed_text(text, book): #print ref #JSON obj matching the API requirements text_whole = { "title": 'Daat Zkenim on' + book, "versionTitle": "Presburg : A. Schmid, 1842", "versionSource": "http://www.worldcat.org/title/perush-radak-al-ha-torah-sefer-bereshit/oclc/867743220", "language": "he", "text": text, } #save Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Daat_Zkenim_on_" + book + ".json", 'w') as out: json.dump(text_whole, out)
def save_file(intro): text_whole = { "title": "Haamek Davar Intro", "versionTitle": "", "versionSource": "", "language": "he", "text": intro, } Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Haamek_Davar_intro.json", 'w') as out: json.dump(text_whole, out) with open("preprocess_json/Haamek_Davar_intro.json", 'r') as filep: file_text = filep.read() createBookRecord(intro_basic_record()) Helper.postText("Haamek Davar Intro", file_text, False)
def save_parsed_text(parsed): text_whole = { "title": 'Prisha', "versionTitle": "Vilna 1924", "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001935970", "language": "he", "text": parsed, "digitizedBySefaria": True, "license": "Public Domain", "licenseVetted": True, "status": "locked", } Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Prisha.json", 'w') as out: json.dump(text_whole, out)
def save_parsed_text(text): #print ref #JSON obj matching the API requirements text_whole = { "title": 'II Chronicles', "versionTitle": "The Rashi chumash by Rabbi Shraga Silverstein", "versionSource": "http://www.sefaria.org/shraga-silverstein", "language": "en", "text": text, } #save Helper.mkdir_p("preprocess_json/") with open( "preprocess_json/Shraga_Silverstein_translation_on_II_Chronicles.json", 'w') as out: json.dump(text_whole, out)
def save_parsed_text(text): #print ref #JSON obj matching the API requirements text_whole = { "title": '%s' % commentator, "versionTitle": "Wikisource " + commentator, "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957", "language": "he", "text": text, } #save Helper.mkdir_p("preprocess_json/") with open("preprocess_json/{}_on_{}.json".format(commentator, masechet), 'w') as out: json.dump(text_whole, out)
def save_parsed_text(text): #print ref #JSON obj matching the API requirements text_whole = { "title": 'Mishnah'.format(masechet), "versionTitle": "The Mishna with Obadiah Bartenura by Rabbi Shraga Silverstein", "versionSource": "http://www.sefaria.org/shraga-silverstein", "language": "en", "text": text, } #save Helper.mkdir_p("preprocess_json/") with open( "preprocess_json/Shraga_Silverstein_translation_on_{}.json".format( masechet), 'w') as out: json.dump(text_whole, out)
def save_parsed_text(commentator, book_name, text): #assemble the title ref commentator_title = unicode(available_commentators[commentator]['record']['title'],'utf-8') ref = commentator_title + ' on ' + book_name #print ref #JSON obj matching the API requirements text_whole = { "title": ref, "versionTitle": "On Your Way", "versionSource": "http://mobile.tora.ws/", "language": "he", "text": text, } #save Helper.mkdir_p(preprocess_path + commentator + "/") with open(preprocess_path + commentator + "/" + ref + ".json", 'w') as out: json.dump(text_whole, out)
def save_parsed_text(commentator, book_name, text): #assemble the title ref commentator_title = unicode( available_commentators[commentator]['record']['title'], 'utf-8') ref = commentator_title + ' on ' + book_name #print ref #JSON obj matching the API requirements text_whole = { "title": ref, "versionTitle": "On Your Way", "versionSource": "http://mobile.tora.ws/", "language": "he", "text": text, } #save Helper.mkdir_p(preprocess_path + commentator + "/") with open(preprocess_path + commentator + "/" + ref + ".json", 'w') as out: json.dump(text_whole, out)
def save_parsed_text(text, chelek): #print ref #JSON obj matching the API requirements text_whole = { "title": 'Terumat HaDeshen, ' + chelek, "versionTitle": "Warsaw 1882", "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001175907", "language": "he", "text": text, "status":"locked", "digitizedBySefaria" : True, "licenseVetted" : True, "license" : "Public Domain" } Helper.mkdir_p("preprocess_json/") chelek = re.sub(" ", "_", chelek.strip()) with open("preprocess_json/Terumat_HaDeshen_{}.json".format(chelek), 'w') as out: json.dump(text_whole, out)
def save_texts(main, additions, main_footnotes, added_footnotes): main_text = { "title": "Mekhilta DeRabbi Shimon Bar Yochai", "versionTitle": "Mechilta de-Rabbi Simon b. Jochai, Dr. D. Hoffman, Frankfurt 1905", "versionSource": "https://openlibrary.org/books/OL23318277M/Mekhilta_de-Rabi_Shimon_ben_Yoai_al_sefer_Shemot", "language": "he", "text": main, } Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Mekhilta DeRashbi.json", 'w') as out: json.dump(main_text, out) text_additions = { "title": "Mekhilta DeRabbi Shimon Bar Yochai Additions", "versionTitle": "Mechilta de-Rabbi Simon b. Jochai, Dr. D. Hoffman, Frankfurt 1905", "versionSource": "https://openlibrary.org/books/OL23318277M/Mekhilta_de-Rabi_Shimon_ben_Yoai_al_sefer_Shemot", "language": "he", "text": additions, } Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Mekhilta DeRashbi Added.json", 'w') as out: json.dump(text_additions, out) main_footnotes_text = { "versionTitle": "Mechilta de-Rabbi Simon b. Jochai, Dr. D. Hoffman, Frankfurt 1905", "versionSource": "https://openlibrary.org/books/OL23318277M/Mekhilta_de-Rabi_Shimon_ben_Yoai_al_sefer_Shemot", "language": "he", "text": main_footnotes, } Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Mekhilta DeRashbi Footnotes 1.json", 'w') as out: json.dump(main_footnotes_text, out) added_footnotes_text = { "versionTitle": "Mechilta de-Rabbi Simon b. Jochai, Dr. D. Hoffman, Frankfurt 1905", "versionSource": "https://openlibrary.org/books/OL23318277M/Mekhilta_de-Rabi_Shimon_ben_Yoai_al_sefer_Shemot", "language": "he", "text": added_footnotes, } Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Mekhilta DeRashbi Footnotes 2.json", 'w') as out: json.dump(added_footnotes_text, out)
def save_parsed_text(text, chelek): #print ref #JSON obj matching the API requirements text_whole = { "title": 'Terumat HaDeshen, ' + chelek, "versionTitle": "Warsaw 1882", "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001175907", "language": "he", "text": text, "status": "locked", "digitizedBySefaria": True, "licenseVetted": True, "license": "Public Domain" } Helper.mkdir_p("preprocess_json/") chelek = re.sub(" ", "_", chelek.strip()) with open("preprocess_json/Terumat_HaDeshen_{}.json".format(chelek), 'w') as out: json.dump(text_whole, out)
def save_parsed_text(text, commentator): print commentator if "Korban Netanel" in commentator: a = u" קרבן נתנאל על " + masechet_he b = u"Korban Netanel on " + masechet if "Pilpula Charifta" in commentator: b = u"Pilpula Charifta on " + masechet a = u" פילפולא חריפתא על " + masechet_he text_whole = { "title": b + masechet, "versionTitle": "Vilna Edition", "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957", "language": "he", "text": text, "digitizedBySefaria": True, "license": "Public Domain", "licenseVetted": True, "status": "locked", } #save Helper.mkdir_p("preprocess_json/") saved_commetator = re.sub(" ", "_", commentator.strip()) with open("preprocess_json/"+saved_commetator +"_%s.json" % masechet , 'w') as out: json.dump(text_whole, out)
def save_parsed_text(text, sub_directory=None): directory = "preprocess_json/%s" % sub_directory if sub_directory else "preprocess_json" Helper.mkdir_p(directory) with open(directory + "/" + text['title'] + ".json", 'w') as out: json.dump(text, out)
def save_links(links): Helper.mkdir_p("preprocess_json/links/") with open("preprocess_json/links/Mekhilta DeRashbi links.json", 'w') as out: json.dump(links, out)
new_dh = re.sub(ur"\.", " -", DH, count = 1) else: new_dh =DH newLine.append(new_dh) newDaf.append(newLine) newShas.append(newDaf) #except Exception as e: # print "%s did not work" %mas #print newShas[3][0][0] text_whole = { "title": 'Tosafot' , "versionTitle": "Wikisource Tosafot", "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957", "language": "he", "text": newShas, } #save Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Tosafot_on_{}.json".format(mas), 'w') as out: json.dump(text_whole, out) #Helper.createBookRecord(book_record()) with open("preprocess_json/Tosafot_on_%s.json" %mas, 'r') as filep: file_text = filep.read() mas = re.sub("_"," ", mas.strip()) Helper.postText("Tosafot on {}".format(mas) , file_text, False)
def save_parsed_text(text, sub_directory=None): directory = "preprocess_json/%s" % sub_directory if sub_directory else "preprocess_json" Helper.mkdir_p(directory) with open(directory + "/" + text["title"] + ".json", "w") as out: json.dump(text, out)
def save_links(commentator, book_name, links_arr): Helper.mkdir_p("preprocess_json/mishnahCommentary/links/") with open("preprocess_json/mishnahCommentary/links/" + book_name + ".json", 'w') as out: json.dump(links_arr, out)
if "-" not in DH: new_dh = re.sub(r"\.", " -", DH, count=1) else: new_dh = DH newLine.append(new_dh) newDaf.append(newLine) newShas.append(newDaf) #except Exception as e: # print "%s did not work" %mas #print newShas[3][0][0] text_whole = { "title": 'Tosafot', "versionTitle": "Wikisource Tosafot", "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957", "language": "he", "text": newShas, } #save Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Tosafot_on_{}.json".format(mas), 'w') as out: json.dump(text_whole, out) #Helper.createBookRecord(book_record()) with open("preprocess_json/Tosafot_on_%s.json" % mas, 'r') as filep: file_text = filep.read() mas = re.sub("_", " ", mas.strip()) Helper.postText("Tosafot on {}".format(mas), file_text, False)
def save_parsed_links(links): Helper.mkdir_p("preprocess_json/links/") with open("preprocess_json/links/Meshech_Hochma_links.json", 'w') as out: json.dump(links, out)