def modify_bulk_text(user: int, version: model.Version, text_map: dict, vsource=None, **kwargs) -> dict: """ user: user ID of user making modification version: version object of text being modified text_map: dict with segment ref keys and text values. Each key/value pair represents a segment that should be modified. Segments that don't have changes will be ignored. vsource: optional parameter to set the version source of the version. not sure why this is here. I copied it from modify_text. """ def populate_change_map(old_text, en_tref, he_tref, _): nonlocal change_map, existing_tref_set existing_tref_set.add(en_tref) new_text = text_map.get(en_tref, None) if new_text is None or new_text == old_text: return change_map[en_tref] = (old_text, new_text, model.Ref(en_tref)) change_map = {} existing_tref_set = set() version.walk_thru_contents(populate_change_map) new_ref_set = set(text_map.keys()).difference(existing_tref_set) for new_tref in new_ref_set: if len(text_map[new_tref].strip()) == 0: # this ref doesn't exist for this version. probably exists in a different version # no reason to add to change_map if it has not content continue change_map[new_tref] = ('', text_map[new_tref], model.Ref(new_tref)) if vsource: version.versionSource = vsource # todo: log this change # modify version in place error_map = {} for _, new_text, oref in change_map.values(): try: version.sub_content_with_ref(oref, new_text) except Exception as e: error_map[oref.normal( )] = f"Ref doesn't match schema of version. Exception: {repr(e)}" version.save() for old_text, new_text, oref in change_map.values(): if oref.normal() in error_map: continue post_modify_text(user, kwargs.get("type"), oref, version.language, version.versionTitle, old_text, new_text, version._id, **kwargs) return error_map
def daily_929(datetime_obj): #datetime should just be a date, like datetime.today() p = p929.Perek(datetime_obj.date()) rf = model.Ref("{} {}".format(p.book_name, p.book_chapter)) display_en = "{} ({})".format(rf.normal(), p.number) display_he = "{} ({})".format(rf.he_normal(), p.number) return [{ 'title' : {'en':'929', 'he': '929'}, 'displayValue': {'en':display_en, 'he': display_he}, 'url': rf.url(), 'ref': rf.normal(), 'order': 4, 'category': rf.index.get_primary_category() }]
def daily_rambam(datetime_obj): datetime_obj = datetime.datetime(datetime_obj.year,datetime_obj.month,datetime_obj.day) daily_rambam = db.daily_rambam.find_one({"date": {"$eq": datetime_obj}}) rf = model.Ref(daily_rambam["ref"]) display_value_en = rf.normal().replace("Mishneh Torah, ","") display_value_he = rf.he_normal().replace("משנה תורה, ", "") return [{ 'title': {'en': 'Daily Rambam', 'he': 'הרמב"ם היומי'}, 'displayValue': {'en': display_value_en, 'he': display_value_he}, 'url': rf.url(), 'ref': rf.normal(), 'order': 6, 'category': rf.index.get_primary_category() }]
def reset_index_cache_for_text(request, title): index = model.library.get_index(title) model.library.refresh_index_record_in_cache(index) model.library.reset_text_titles_cache() if MULTISERVER_ENABLED: server_coordinator.publish_event("library", "refresh_index_record_in_cache", [index.title]) elif USE_VARNISH: invalidate_title(index.title) return HttpResponseRedirect("/%s?m=Cache-Reset" % model.Ref(title).url())
def generate_texts_sitemaps(): """ Create sitemap for each text section for which content is available. Returns the number of files written (each sitemap can have only 50k URLs) """ refs = generate_refs_list() urls = ["http://www.sefaria.org/" + model.Ref(tref).url() for tref in refs] maps = list(chunks(urls, 40000)) for n in range(len(maps)): write_urls(maps[n], "texts-sitemap%d.txt" % n) return len(maps)
def count_sources(sources, sheet_id): global refs, texts, categories global sources_count, comments_count, outside_count, untrans_count global untrans_texts, untrans_categories, untrans_refs global fragments, fragments_count for s in sources: if "ref" in s and s["ref"] is not None: sources_count += 1 try: oref = model.Ref(s["ref"]).padded_ref() except InputError: continue refs[s["ref"]] += 1 texts[oref.book] += 1 categories[oref.index.categories[0]] += 1 if not model.Ref(s["ref"]).is_text_translated(): untrans_categories[oref.index.categories[0]] += 1 untrans_texts[oref.book] += 1 untrans_refs[s["ref"]] += 1 untrans_count += 1 en = strip_tags(s.get("text", {}).get("en", "")) if len(en) > 25: fragments[s["ref"]].append(sheet_id) fragments_count += 1 if "subsources" in s: count_sources(s["subsources"], sheet_id) elif "comment" in s: comments_count += 1 elif "outsideText" in s or "outsideBiText" in s: outside_count += 1
def halakhah_yomit(datetime_obj): datetime_obj = datetime.datetime(datetime_obj.year, datetime_obj.month, datetime_obj.day) db_obj = db.halakhah_yomit.find_one({"date": {"$eq": datetime_obj}}) rf = model.Ref(db_obj["ref"]) display_en = rf.normal().replace("Shulchan Arkuh, ", "") display_he = rf.he_normal() halakha = { "title": {"en": "Halakhah Yomit", "he": "הלכה יומית"}, "displayValue": {"en": display_en, "he": display_he}, "url": rf.url(), "ref": rf.normal(), "order": 9, "category": rf.index.get_primary_category() } return [halakha]
def daily_mishnayot(datetime_obj): mishnah_items = [] datetime_obj = datetime.datetime(datetime_obj.year,datetime_obj.month,datetime_obj.day) daily_mishnahs = db.daily_mishnayot.find({"date": {"$eq": datetime_obj}}).sort([("date", 1)]) for dm in daily_mishnahs: rf = model.Ref(dm["ref"]) mishnah_items.append({ 'title': {'en': 'Daily Mishnah', 'he': 'משנה יומית'}, 'displayValue': {'en': rf.normal(), 'he': rf.he_normal()}, 'url': rf.url(), 'ref': rf.normal(), 'order': 5, 'category': rf.index.get_primary_category() }) return mishnah_items
def test_next_ref(self): assert m.Ref("Job 4:5").next_section_ref().normal() == "Job 5" assert m.Ref("Shabbat 4b").next_section_ref().normal() == "Shabbat 5a" assert m.Ref("Shabbat 5a").next_section_ref().normal() == "Shabbat 5b" assert m.Ref("Rashi on Genesis 5:32:2").next_section_ref().normal( ) == "Rashi on Genesis 6:2" assert m.Ref("Mekhilta 35.3").next_section_ref() is None # This will start to fail when we fill in this text assert m.Ref( "Mekhilta 23:19").next_section_ref().normal() == "Mekhilta 31:12"
def test_prev_ref(self): assert m.Ref("Job 4:5").prev_section_ref().normal() == "Job 3" assert m.Ref("Shabbat 4b").prev_section_ref().normal() == "Shabbat 4a" assert m.Ref("Shabbat 5a").prev_section_ref().normal() == "Shabbat 4b" assert m.Ref("Rashi on Genesis 6:2:1").prev_section_ref().normal( ) == "Rashi on Genesis 5:32" assert m.Ref("Mekhilta 12:1").prev_section_ref() is None # This will start to fail when we fill in this text assert m.Ref( "Mekhilta 31:12").prev_section_ref().normal() == "Mekhilta 23:19"
def he_ref_link(value, absolute=False): """ Transform a ref into an <a> tag linking to that ref in Hebrew. e.g. "Genesis 1:3" -> "<a href='/Genesis.1.2'>בראשית, א, ב</a>" """ if value in he_ref_link_cache: return he_ref_link_cache[value] if not value: return "" try: oref = m.Ref(value) link = '<a class="heRef" href="/' + oref.url() + '">' + re.sub(r"\d+(-\d+)?", "", oref.he_normal()) + '</a>' except: link = '<a class="heRef" href="#invalid-ref">' + value + '</a>' he_ref_link_cache[value] = mark_safe(link) return he_ref_link_cache[value]
def parse_daily_mishnah(filename): # See here: https://stackoverflow.com/questions/17315635/csv-new-line-character-seen-in-unquoted-field-error # for the irregular open flags db.daily_rambam.remove() with open(filename, 'rU') as csvfile: rambams = csv.reader(csvfile, dialect=csv.excel_tab) for row in rambams: if not len(row): continue rf = model.Ref("Mishneh Torah, {}".format(row[2])) rambam = { "date": datetime.strptime(row[1], "%m/%d/%Y"), "ref": rf.normal(), } db.daily_rambam.save(rambam) db.daily_rambam.ensure_index("date")
def make_parashah_response_from_calendar_entry(db_parasha): rf = model.Ref(db_parasha["ref"]) parasha = { 'title': { 'en': 'Parashat Hashavua', 'he': u'פרשת השבוע' }, 'displayValue': { 'en': db_parasha["parasha"], 'he': hebrew_parasha_name(db_parasha["parasha"]) }, 'url': rf.url(), 'order': 1, 'category': rf.index.get_primary_category() } return [parasha]
def ref_link(value, absolute=False): """ Transform a ref into an <a> tag linking to that ref. e.g. "Genesis 1:3" -> "<a href='/Genesis.1.2'>Genesis 1:2</a>" """ if value in ref_link_cache: return ref_link_cache[value] if not value: return "" try: oref = m.Ref(value) link = '<a href="/' + oref.url() + '">' + value + '</a>' except: link = value ref_link_cache[value] = mark_safe(link) return ref_link_cache[value]
def bulktext_api(request, refs): """ Used by the linker. :param request: :param refs: :return: """ if request.method == "GET": cb = request.GET.get("callback", None) useTextFamily = request.GET.get("useTextFamily", None) refs = set(refs.split("|")) res = {} for tref in refs: try: oref = model.Ref(tref) lang = "he" if is_hebrew(tref) else "en" if useTextFamily: text_fam = model.TextFamily(oref, commentary=0, context=0, pad=False) he = text_fam.he en = text_fam.text res[tref] = { 'he': he, 'en': en, 'lang': lang, 'ref': oref.normal(), 'primary_category': text_fam.contents()['primary_category'], 'heRef': oref.he_normal(), 'url': oref.url() } else: he = model.TextChunk(oref, "he").text en = model.TextChunk(oref, "en").text res[tref] = { 'he': he if isinstance(he, basestring) else JaggedTextArray(he).flatten_to_string(), # these could be flattened on the client, if need be. 'en': en if isinstance(en, basestring) else JaggedTextArray(en).flatten_to_string(), 'lang': lang, 'ref': oref.normal(), 'heRef': oref.he_normal(), 'url': oref.url() } except (InputError, ValueError, AttributeError, KeyError) as e: # referer = request.META.get("HTTP_REFERER", "unknown page") # This chatter fills up the logs. todo: put in it's own file # logger.warning(u"Linker failed to parse {} from {} : {}".format(tref, referer, e)) res[tref] = {"error": 1} resp = jsonResponse(res, cb) return resp
def reset_counts(request, title=None): if title: try: i = model.library.get_index(title) except: return HttpResponseRedirect("/dashboard?m=Unknown-Book") vs = model.VersionState(index=i) vs.refresh() return HttpResponseRedirect("/%s?m=Counts-Rebuilt" % model.Ref(i.title).url()) else: model.refresh_all_states() if MULTISERVER_ENABLED: server_coordinator.publish_event("library", "rebuild_toc") return HttpResponseRedirect("/?m=Counts-Rebuilt")
def daily_rambam_three(datetime_obj): rambam_items = [] datetime_obj = datetime.datetime(datetime_obj.year, datetime_obj.month, datetime_obj.day) database_obj = db.daily_rambam_three.find_one({"date": {"$eq": datetime_obj}}) for rf in database_obj["refs"]: rf = model.Ref(rf) display_en = rf.normal().replace("Mishneh Torah, ", "") display_he = rf.he_normal().replace(u"משנה תורה, ", u"") rambam_items.append({ "title": {"en": "Daily Rambam (3)", "he": u'הרמב"ם היומי {}'.format(u"(3)")}, "displayValue": {"en": display_en, "he": display_he}, "url": rf.url(), "ref": rf.normal(), "order": 7, "category": rf.index.get_primary_category() }) return rambam_items
def make_sheet_from_text(text, sources=None, uid=1, generatedBy=None, title=None): """ Creates a source sheet owned by 'uid' that includes all of 'text'. 'sources' is a list of strings naming commentators or texts to include. """ oref = model.Ref(text) sheet = { "title": title if title else oref.normal() if not sources else oref.normal() + " with " + ", ".join([s.replace(" on " + text, "") for s in sources]), "sources": [], "status": 0, "options": { "numbered": 0, "divineNames": "noSub" }, "generatedBy": generatedBy or "make_sheet_from_text", "promptedToPublish": datetime.now().isoformat(), } i = oref.index leafs = i.nodes.get_leaf_nodes() for leaf in leafs: refs = [] if leaf.first_section_ref() != leaf.last_section_ref(): leaf_spanning_ref = leaf.first_section_ref().to( leaf.last_section_ref()) refs += [ ref for ref in leaf_spanning_ref.split_spanning_ref() if oref.contains(ref) ] else: refs.append(leaf.ref()) for ref in refs: ref_dict = {"ref": ref.normal()} sheet["sources"].append(ref_dict) return save_sheet(sheet, uid)
def daf_weekly(datetime_obj): """ :param datetime.datetime datetime_obj: :return: """ """ Weekday values in datetime start on Monday, i.e. Monday = 0, Tuesday = 1, Wednesday = 2,... Sunday = 6 We want to start on Sunday (a new daf is started every Sunday): Sunday = 0, Monday = 1, Tuesday = 2,... """ cur_weekday = (datetime_obj.weekday() + 1) % 7 sunday_obj = datetime_obj - datetime.timedelta(cur_weekday) sunday_obj = datetime.datetime(sunday_obj.year, sunday_obj.month, sunday_obj.day) daf = db.daf_weekly.find_one({"date": {"$eq": sunday_obj}}) daf_str = [daf["daf"]] if isinstance(daf["daf"], basestring) else daf["daf"] daf_weekly_list = [] for d in daf_str: rf = model.Ref(d) display_val = rf.normal() he_display_val = rf.he_normal() if rf.index.get_primary_category() == "Talmud": display_val = display_val[:-1] # remove the a he_display_val = he_display_val[: -2] # remove the alef and the space before it daf_weekly_list.append({ "title": { "en": "Daf a Week", "he": u"דף השבוע" }, "displayValue": { "en": display_val, "he": he_display_val }, "url": rf.url(), "ref": rf.normal(), "order": 8, "category": rf.index.get_primary_category() }) return daf_weekly_list
def discussion_link(discussion): """ Returns a link to layer with id value. :param discussion is either a Layer object or a urlkey for a Layer object. """ if isinstance(discussion, basestring): discussion = m.Layer().load({"urlkey": discussion}) if not discussion: return mark_safe("[discusion not found]") if getattr(discussion, "first_ref", None): oref = m.Ref(discussion.first_ref) href = "/" + oref.url() + "?layer=" + discussion.urlkey count = len(discussion.note_ids) safe = "<a href='{}'>{} ({} notes)</a>".format(href, oref.normal(), count) else: safe = "<a href='/Genesis.1?layer=" + discussion.urlkey + "'>Unstarted Discussion</a>" return mark_safe(safe)
def arukh_hashulchan(datetime_obj): items = [] datetime_obj = datetime.datetime(datetime_obj.year, datetime_obj.month, datetime_obj.day) database_obj = db.arukh_hashulchan.find_one({"date": {"$eq": datetime_obj}}) if not database_obj: return [] rf = database_obj["refs"] rf = model.Ref(rf) display_en = rf.normal() display_he = rf.he_normal() items.append({ "title": {"en": "Arukh HaShulchan Yomi", "he": 'ערוך השולחן היומי'}, "displayValue": {"en": display_en.replace("Arukh HaShulchan, ", ""), "he": display_he.replace("ערוך השולחן, ", "")}, "url": rf.url(), "ref": rf.normal(), "order": 10, "category": rf.index.get_primary_category() }) return items
def bulktext_api(request, refs): """ Used by the linker. :param request: :param refs: :return: """ if request.method == "GET": cb = request.GET.get("callback", None) refs = set(refs.split("|")) res = {} for tref in refs: try: oref = model.Ref(tref) lang = "he" if is_hebrew(tref) else "en" he = model.TextChunk(oref, "he").text en = model.TextChunk(oref, "en").text res[tref] = { 'he': he if isinstance(he, basestring) else JaggedTextArray(he).flatten_to_string( ), # these could be flattened on the client, if need be. 'en': en if isinstance(en, basestring) else JaggedTextArray(en).flatten_to_string(), 'lang': lang, 'ref': oref.normal(), 'heRef': oref.he_normal(), 'url': oref.url() } except (InputError, ValueError, AttributeError) as e: referer = request.META.get("HTTP_REFERER", "unknown page") logger.warning( u"Linker failed to parse {} from {} : {}".format( tref, referer, e)) res[tref] = {"error": 1} resp = jsonResponse(res, cb) resp['Access-Control-Allow-Origin'] = '*' return resp
def tikkunei_yomi(datetime_obj): tikkunei_items = [] datetime_obj = datetime.datetime(datetime_obj.year, datetime_obj.month, datetime_obj.day) database_obj = db.daily_tikkunei_zohar.find_one({"date": {"$eq": datetime_obj}}) if not database_obj: return [] rf = database_obj["ref"] rf = model.Ref(rf) display_en = database_obj["displayValue"] display_he = database_obj["heDisplayValue"] tikkunei_items.append({ "title": {"en": "Zohar for Elul", "he": 'תיקוני זוהר לחודש אלול'}, "displayValue": {"en": display_en, "he": display_he}, "url": rf.url(), "ref": rf.normal(), "order": 12, "category": rf.index.get_primary_category() }) return tikkunei_items
def test_sub_content_complex_setter(self): self.complexVersion.sub_content( [], value={"Node 1": { "Node 2": [['wadup']], "Node 3": [] }}) assert self.complexVersion.chapter['Node 1']['Node 2'] == [['wadup']] self.complexVersion.sub_content(["Node 1"], value={"Node 2": [['yoyoyo']]}) assert self.complexVersion.chapter['Node 1']['Node 2'] == [['yoyoyo']] self.complexVersion.sub_content_with_ref( model.Ref(f"{self.complexIndexTitle}"), { "Node 1": { "Node 2": [['yo'], ['', 'blah'], ["original text", "2nd"]] }, "Node 3": ['1', '2', '3', '4'] })
def test_sub_content_simple_setter(self): self.simpleVersion.sub_content( self.simpleIndex.nodes.version_address(), value=[[], [], []]) for i in range(3): assert self.simpleVersion.chapter[i] == [] self.simpleVersion.sub_content( self.simpleIndex.nodes.version_address(), [1], value=['yo1', 'yo2', 'yo3']) assert self.simpleVersion.chapter[1] == ['yo1', 'yo2', 'yo3'] self.simpleVersion.sub_content( self.simpleIndex.nodes.version_address(), [0, 1], value='yo') assert self.simpleVersion.chapter[0][1] == 'yo' # reset self.simpleVersion.sub_content_with_ref( model.Ref(f"{self.simpleIndexTitle}"), [['1'], ['2'], ["original text", "2nd"]])
def test_regex_string_he_in_parentheses(self): st3 = '(בדברים לב ובספרות ג ב)' titles = ['דברים', 'רות'] for title in titles: lang = "he" if is_hebrew(title) else "en" reg_str = m.library.get_regex_string(title, lang, for_js=True, anchored=False, capture_title=False, parentheses=True) reg = re.compile(reg_str, re.VERBOSE) match = reg.search(st3) if title == 'דברים': assert m.Ref(match.group(1)).normal() == "Deuteronomy 32" else: assert match is None
def tanakh_yomi(datetime_obj): tanakh_items = [] datetime_obj = datetime.datetime(datetime_obj.year, datetime_obj.month, datetime_obj.day) database_obj = db.tanakh_yomi.find_one({"date": {"$eq": datetime_obj}}) if not database_obj: return [] rf = database_obj["ref"] rf = model.Ref(rf) display_en = database_obj["displayValue"] display_he = database_obj["heDisplayValue"] tanakh_items.append({ "title": {"en": "Tanakh Yomi", "he": 'תנ"ך יומי'}, "displayValue": {"en": display_en, "he": display_he}, "url": rf.url(), "ref": rf.normal(), "order": 11, "category": rf.index.get_primary_category() }) return tanakh_items
def test_sub_content_with_ref(self): self.simpleVersion.sub_content_with_ref(model.Ref(f"{self.simpleIndexTitle} 3:2"), "new text") assert self.simpleVersion.chapter[2][1] == "new text" self.complexVersion.sub_content_with_ref(model.Ref(f"{self.complexIndexTitle}, Node 1, Node 2 3:2"), "new text") assert self.complexVersion.chapter["Node 1"]["Node 2"][2][1] == "new text" self.complexVersion.sub_content_with_ref(model.Ref(f"{self.complexIndexTitle}, Node 1, Node 2 3"), ["blah", "blarg"]) assert self.complexVersion.chapter["Node 1"]["Node 2"][2] == ["blah", "blarg"] self.complexVersion.sub_content_with_ref(model.Ref(f"{self.complexIndexTitle}, Node 1, Node 2 3"), ["original text", "2nd"]) # set back to original content for other tests self.complexVersion.sub_content_with_ref(model.Ref(f"{self.complexIndexTitle}, Node 1, Node 2"), [["blah", "blarg"], ['more content']]) assert self.complexVersion.chapter["Node 1"]["Node 2"] == [["blah", "blarg"], ['more content']] self.complexVersion.sub_content_with_ref(model.Ref(f"{self.complexIndexTitle}, Node 1, Node 2"), [['yo'],['', 'blah'],["original text", "2nd"]]) # set back to original content for other tests
def validate_text(text, tref): """ validate a dictionary representing a text to be written to db.texts """ # Required Keys for key in ("versionTitle", "versionSource", "language", "text"): if not key in text: return {"error": "Field '%s' missing from posted JSON." % key} oref = model.Ref(tref) # Validate depth of posted text matches expectation posted_depth = 0 if isinstance(text["text"], basestring) else list_depth( text["text"]) implied_depth = len(oref.sections) + posted_depth if implied_depth != oref.index.textDepth: raise InputError( u"Text Structure Mismatch. The stored depth of {} is {}, but the text posted to {} implies a depth of {}." .format(oref.book, oref.index.textDepth, tref, implied_depth)) return {"status": "ok"}
def validate_review(review): for field in ("score", "comment", "ref", "language", "version"): if field not in review: return { "error": "Required field '%s' is missing from this review." % field } try: score = float(review["score"]) if score > 1 or score < 0: return {"error": "'score' must be a number between 0 and 1."} except TypeError: return {"error": "'score' must be a number between 0 and 1."} #This will throw an InputError if there is anything wrong w/ the Ref model.Ref(review["ref"]) return {"result": "ok"}