Пример #1
0
def print_and_add_category(category):
    add_category(category[-1], list(category), server=server)
    with lock:
        current_index = tracker[0] + 1
        if current_index % 10 == 0:
            print(current_index)
        if current_index % 25 == 0:
            time.sleep(3)
        tracker[0] = current_index
Пример #2
0
def ensure_categories(storage_object, destination):
    """
    :param CommentStore storage_object:
    :param destination:
    :return:
    """
    categories = set([tuple(storage_object.get_index_for_title(title)['categories'])
                      for title in storage_object.get_index_titles()])
    for category_list in categories:
        add_category(category_list[-1], category_list, server=destination)
Пример #3
0
def ensure_categories(storage_object, destination):
    """
    :param CommentStore storage_object:
    :param destination:
    :return:
    """
    categories = set([
        tuple(storage_object.get_index_for_title(title)['categories'])
        for title in storage_object.get_index_titles()
    ])
    for category_list in categories:
        add_category(category_list[-1], category_list, server=destination)
Пример #4
0
    else:
        book_name = u"{} on {}".format(user_args.title, base_text_title)
        book_xml = root.get_commentaries().get_commentary_by_title(user_args.title)
        book_ja = book_xml.render()
        he_book_name = u"{} על {}".format(book_xml.titles['he'], he_base_title)
        links = book_xml.collect_links()
        if user_args.title == u"Sha'arei Teshuvah":
            links += shaarei_special_links()
        index = commentary_index(book_name, he_book_name, user_args.title)
        post_parse[user_args.title](book_ja)

        if user_args.add_term:
            functions.add_term(user_args.title, book_xml.titles['he'], server=user_args.server)

        functions.add_category(user_args.title, index['categories'], server=user_args.server)

    if user_args.verbose:
        print index

    functions.post_index(index, server=user_args.server)

    # version = {
    #     "versionTitle": "Maginei Eretz; Shulchan Aruch Orach Chaim, Lemberg, 1893",
    #     "versionTitleInHebrew": u"""ספר מגיני ארץ; שלחן ערוך. למברג, תרנ"ג""",
    #     "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH002084080",
    #     "language": "he",
    #     "text": book_ja,
    # }
    version = {
        "versionTitle": "Maginei Eretz: Shulchan Aruch Orach Chaim, Lemberg, 1893",
Пример #5
0
current_commentary, comment_store = None, []
for item in tqdm(iter_cursor(cursor), total=28479):
    if not current_commentary or not current_commentary.is_part_of_commentary(
            item):
        current_commentary = build_commentary_from_row(item)
        comment_store.append(current_commentary)
        terms.add(current_commentary.get_term_data())
        categories.add(current_commentary.get_category())
    current_commentary.add_segments_from_row(item)

server = 'http://localhost:8000'
# server = 'https://www.sefaria.org'
# server = 'http://friedberg.sandbox.sefaria.org'
for term in terms:
    add_term(*term, server=server)

print(f'There are {len(categories)} categories')
# with ThreadPoolExecutor(max_workers=6) as executor:
#     executor.map(print_and_add_category, categories)
for cat in tqdm(categories):
    add_category(cat[-1], list(cat), server=server)

print(f'There are {len(comment_store)} books to upload')
tracker[0] = 0
# with ThreadPoolExecutor(max_workers=3) as executor:
#     executor.map(upload_commentary, comment_store)
# for item, comment in enumerate(tqdm(comment_store), 1):
#     dump_index_and_text(comment)

print(len(terms), *(term[0] for term in terms), sep='\n')
Пример #6
0
            print links[0]['refs']

        index = commentary_index(book_name, he_book_name, user_args.title)
        if user_args.title == u'Siftei Kohen':
            shach_clean(book_ja)
        elif user_args.title == u'Turei Zahav':
            clean_taz(book_ja)
        else:
            remove_question_marks(book_ja)

        if user_args.add_term:
            functions.add_term(user_args.title,
                               book_xml.titles['he'],
                               server=user_args.server)
        functions.add_category(user_args.title,
                               index['categories'],
                               server=user_args.server)

    if user_args.verbose:
        print index
    functions.post_index(index, server=user_args.server)

    version = {
        "versionTitle":
        "Ashlei Ravrevei: Shulchan Aruch Yoreh Deah, Lemberg, 1888",
        "versionSource":
        "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH002097765",
        "language": "he",
        "text": book_ja
    }
    if user_args.title == u'Siftei Kohen':
Пример #7
0
                links.append(match)

    """
    with open('Unlinked Eliyah Rabbah.txt', 'w') as er_unlinked:
        for index, comment in enumerate(unlinked_comments):
            er_unlinked.write(str(unlinked_indexes[index]))
            er_unlinked.write('\n')
            er_unlinked.write(comment)
            er_unlinked.write('\n')
    """


    server = 'https://eliyah-rabbah.cauldron.sefaria.org'
    erh = 'אליה רבה'
    add_term('Eliyah Rabbah', erh, server = server)
    add_category('Eliyah Rabbah',['Halakhah', 'Shulchan Arukh', 'Commentary', 'Eliyah Rabbah'], server = server)

    english_title = 'Eliyah Rabbah on Shulchan Arukh, Orach Chayim'
    hebrew_title = 'אליה רבה על שלחן ערוך אורח חיים'

    ja = JaggedArrayNode()
    ja.add_primary_titles(english_title, hebrew_title)
    ja.add_structure(['Siman', 'Seif Katan'], address_types=[u'Siman', u'Integer'])
    ja.validate()

    index_dict = {
        'title': english_title,
        'base_text_titles': ['Shulchan Arukh, Orach Chayim'],
        'dependence': 'Commentary',
        'collective_title': 'Eliyah Rabbah',
        'categories': [
Пример #8
0
 def post_commentary_terms_and_categories(self, server):
     for term in self.terms:
         post_term(term, server=server)
     for index in self.commentaryIndices:
         category = index['categories']
         add_category(category[-1], category, server=server)