def print_and_add_category(category): add_category(category[-1], list(category), server=server) with lock: current_index = tracker[0] + 1 if current_index % 10 == 0: print(current_index) if current_index % 25 == 0: time.sleep(3) tracker[0] = current_index
def ensure_categories(storage_object, destination): """ :param CommentStore storage_object: :param destination: :return: """ categories = set([tuple(storage_object.get_index_for_title(title)['categories']) for title in storage_object.get_index_titles()]) for category_list in categories: add_category(category_list[-1], category_list, server=destination)
def ensure_categories(storage_object, destination): """ :param CommentStore storage_object: :param destination: :return: """ categories = set([ tuple(storage_object.get_index_for_title(title)['categories']) for title in storage_object.get_index_titles() ]) for category_list in categories: add_category(category_list[-1], category_list, server=destination)
else: book_name = u"{} on {}".format(user_args.title, base_text_title) book_xml = root.get_commentaries().get_commentary_by_title(user_args.title) book_ja = book_xml.render() he_book_name = u"{} על {}".format(book_xml.titles['he'], he_base_title) links = book_xml.collect_links() if user_args.title == u"Sha'arei Teshuvah": links += shaarei_special_links() index = commentary_index(book_name, he_book_name, user_args.title) post_parse[user_args.title](book_ja) if user_args.add_term: functions.add_term(user_args.title, book_xml.titles['he'], server=user_args.server) functions.add_category(user_args.title, index['categories'], server=user_args.server) if user_args.verbose: print index functions.post_index(index, server=user_args.server) # version = { # "versionTitle": "Maginei Eretz; Shulchan Aruch Orach Chaim, Lemberg, 1893", # "versionTitleInHebrew": u"""ספר מגיני ארץ; שלחן ערוך. למברג, תרנ"ג""", # "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH002084080", # "language": "he", # "text": book_ja, # } version = { "versionTitle": "Maginei Eretz: Shulchan Aruch Orach Chaim, Lemberg, 1893",
current_commentary, comment_store = None, [] for item in tqdm(iter_cursor(cursor), total=28479): if not current_commentary or not current_commentary.is_part_of_commentary( item): current_commentary = build_commentary_from_row(item) comment_store.append(current_commentary) terms.add(current_commentary.get_term_data()) categories.add(current_commentary.get_category()) current_commentary.add_segments_from_row(item) server = 'http://localhost:8000' # server = 'https://www.sefaria.org' # server = 'http://friedberg.sandbox.sefaria.org' for term in terms: add_term(*term, server=server) print(f'There are {len(categories)} categories') # with ThreadPoolExecutor(max_workers=6) as executor: # executor.map(print_and_add_category, categories) for cat in tqdm(categories): add_category(cat[-1], list(cat), server=server) print(f'There are {len(comment_store)} books to upload') tracker[0] = 0 # with ThreadPoolExecutor(max_workers=3) as executor: # executor.map(upload_commentary, comment_store) # for item, comment in enumerate(tqdm(comment_store), 1): # dump_index_and_text(comment) print(len(terms), *(term[0] for term in terms), sep='\n')
print links[0]['refs'] index = commentary_index(book_name, he_book_name, user_args.title) if user_args.title == u'Siftei Kohen': shach_clean(book_ja) elif user_args.title == u'Turei Zahav': clean_taz(book_ja) else: remove_question_marks(book_ja) if user_args.add_term: functions.add_term(user_args.title, book_xml.titles['he'], server=user_args.server) functions.add_category(user_args.title, index['categories'], server=user_args.server) if user_args.verbose: print index functions.post_index(index, server=user_args.server) version = { "versionTitle": "Ashlei Ravrevei: Shulchan Aruch Yoreh Deah, Lemberg, 1888", "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH002097765", "language": "he", "text": book_ja } if user_args.title == u'Siftei Kohen':
links.append(match) """ with open('Unlinked Eliyah Rabbah.txt', 'w') as er_unlinked: for index, comment in enumerate(unlinked_comments): er_unlinked.write(str(unlinked_indexes[index])) er_unlinked.write('\n') er_unlinked.write(comment) er_unlinked.write('\n') """ server = 'https://eliyah-rabbah.cauldron.sefaria.org' erh = 'אליה רבה' add_term('Eliyah Rabbah', erh, server = server) add_category('Eliyah Rabbah',['Halakhah', 'Shulchan Arukh', 'Commentary', 'Eliyah Rabbah'], server = server) english_title = 'Eliyah Rabbah on Shulchan Arukh, Orach Chayim' hebrew_title = 'אליה רבה על שלחן ערוך אורח חיים' ja = JaggedArrayNode() ja.add_primary_titles(english_title, hebrew_title) ja.add_structure(['Siman', 'Seif Katan'], address_types=[u'Siman', u'Integer']) ja.validate() index_dict = { 'title': english_title, 'base_text_titles': ['Shulchan Arukh, Orach Chayim'], 'dependence': 'Commentary', 'collective_title': 'Eliyah Rabbah', 'categories': [
def post_commentary_terms_and_categories(self, server): for term in self.terms: post_term(term, server=server) for index in self.commentaryIndices: category = index['categories'] add_category(category[-1], category, server=server)