def save_row(user_id, version_title, version_lang, action_type, row): ref = Ref(row[0]) print "Saving: {}".format(ref.normal()) try: modify_text(user_id, ref, version_title, version_lang, row[1], type=action_type) except InputError: pass
def test_add_remove_links_with_text_save(): title = 'Rashi on Genesis' desired_link_count = 2027 regex = Ref(title).regex() tref = 'Rashi on Genesis 18:22' oref = Ref(tref) lang = 'he' vtitle = "test" stext = [ u"כךל שדךלגכח ש ךלדקחכ ףךדלכח שףךדג", u"כךל שדךלגכח ש ךלדקחכ ףךדלכח שףךדג", u"כךל שדךלגכח ש ךלדקחכ ףךדלכח שףךדג" ] tracker.modify_text(1, Ref(tref), vtitle, lang, stext) ls = LinkSet({ "refs": { "$regex": regex }, "generated_by": "add_commentary_links" }) link_count = ls.count() assert link_count == (desired_link_count + 1) chunk = TextChunk(oref, lang, vtitle) chunk.text = chunk.text[:-1] tracker.modify_text(1, Ref(tref), vtitle, lang, chunk.text) ls = LinkSet({ "refs": { "$regex": regex }, "generated_by": "add_commentary_links" }) link_count = ls.count() assert link_count == desired_link_count
def _import_versions_from_csv(rows, columns, user_id): from sefaria.tracker import modify_text index_title = rows[0][columns[0]] # assume the same index title for all index_node = Ref(index_title).index_node action = "edit" for column in columns: # Create version version_title = rows[1][column] version_lang = rows[2][column] v = Version().load({"title": index_title, "versionTitle": version_title, "language": version_lang}) if v is None: action = "add" v = Version( { "chapter": index_node.create_skeleton(), "title": index_title, "versionTitle": version_title, "language": version_lang, # Language "versionSource": rows[3][column], # Version Source "versionNotes": rows[4][column], # Version Notes } ).save() # Populate it for row in rows[5:]: ref = Ref(row[0]) print "Saving: {}".format(ref.normal()) try: modify_text(user_id, ref, version_title, version_lang, row[column], type=action) except InputError: pass
def test_add_remove_links_with_text_save_complex(): title = 'Kos Shel Eliyahu on Pesach Haggadah' desired_link_count = 80 regex = Ref(title).regex() tref = 'Kos Shel Eliyahu on Pesach Haggadah, Kadesh 1' oref = Ref(tref) lang = 'en' vtitle = "test" stext = [ "thlerkawje alkejal ekjlkej", "eaflkje arheahrlka jhklajdhkl ADJHKL" ] tracker.modify_text(1, Ref(tref), vtitle, lang, stext) ls = LinkSet({ "refs": { "$regex": regex }, "generated_by": "add_commentary_links" }) link_count = ls.count() assert link_count == desired_link_count + 2 chunk = TextChunk(oref, lang, vtitle) chunk.text = [] tracker.modify_text(1, Ref(tref), vtitle, lang, chunk.text) ls = LinkSet({ "refs": { "$regex": regex }, "generated_by": "add_commentary_links" }) link_count = ls.count() assert link_count == desired_link_count
def modify_text(user, oref, versionTitle, language, text, versionSource, tries=0): try: tracker.modify_text(user, oref, versionTitle, language, text, versionSource, method="API", skip_links=False) except UnicodeEncodeError: print("UnicodeEncodeError: {}".format(oref.normal())) pass # there seems to be unicode error in "/app/sefaria/system/varnish/wrapper.py", line 55 except AutoReconnect: if tries < 200: modify_text(user, oref, versionTitle, language, text, versionSource, tries=tries + 1) else: raise AutoReconnect( "Tried so hard but got so many autoreconnects...") except AssertionError: pass
def modify_ja(node_list): my_ref = derive_ref(node_list) my_text = get_text(text_json['text'], node_list) modify_text(user_id, my_ref, version_title, version_lang, my_text, type=action_type)
def fix_rashi(): # create version record for ktav yad verison = { 'chapter': [], 'versionTitle': 'Wikisource Ktav Yad Rashi', 'versionSource': 'Wikisource', 'language': 'he', 'title': 'Ktav Yad Rashi on Menachot' } Version(verison).save() data = separate_ktav_yad_rashi() rashi = structure_refs(data['rashi']) ktav_yad = structure_refs(data['ktav yad rashi']) all_refs = Ref('Rashi on Menachot 72b-94a') ref = Ref('Rashi on Menachot 72b').first_available_section_ref() while all_refs.contains(ref): # keep track of status print ref.uid() all_text = TextChunk(ref, 'he', u'Wikisource Rashi') # if there is no ktav yad on ref - do nothing if not (ref.uid() in ktav_yad.keys()): ref = ref.next_section_ref() continue # add ktav texts to an array ktav_text = [] for chunk in ktav_yad[ref.uid()]: ktav_text.append(all_text.text[chunk-1]) # add rashi texts to an array rashi_text = [] if ref.uid() in rashi.keys(): for chunk in rashi[ref.uid()]: rashi_text.append(all_text.text[chunk-1]) # create a ref for the ktav yad ktav_ref = Ref(ref.uid().replace('Rashi', 'Ktav Yad Rashi')) tracker.modify_text(23432, ktav_ref, u'Wikisource Ktav Yad Rashi', 'he', ktav_text) # Save rashi text tracker.modify_text(23432, ref, u'Wikisource Rashi', 'he', rashi_text) ref = ref.next_section_ref() vs = VersionState("Rashi on Menachot") vs.refresh() vs = VersionState("Ktav Yad Rashi on Menachot") vs.refresh()
def create_version_from_outline_notes(self): from sefaria.tracker import modify_text for text_ref in self.version_info['text']: node = text_ref['node'] ref = Ref(node.full_title(force_update=True)) text = text_ref['text'] user = 8646 vtitle = self.version_info['info']['versionTitle'] lang = self.version_info['info']['language'] vsource = self.version_info['info']['versionSource'] modify_text(user,ref,vtitle, lang, text, vsource)
def create_version_from_outline_notes(self): from sefaria.tracker import modify_text for text_ref in self.version_info['text']: node = text_ref['node'] ref = Ref(node.full_title(force_update=True)) text = text_ref['text'] user = 8646 vtitle = self.version_info['info']['versionTitle'] lang = self.version_info['info']['language'] vsource = self.version_info['info']['versionSource'] modify_text(user, ref, vtitle, lang, text, vsource)
def modify_text_by_function(title, vtitle, lang, func, uid): """ Walks ever segment contained in title, calls func on the text and saves the result. """ from sefaria.tracker import modify_text section_refs = VersionStateSet({"title": title}).all_refs() for section_ref in section_refs: section = section_ref.text(vtitle=vtitle, lang=lang) segment_refs = section_ref.subrefs(len(section.text)) for i in range(len(section.text)): text = func(section.text[i]) modify_text(uid, segment_refs[i], vtitle, lang, text)
def modify_text(user, oref, versionTitle, language, text, versionSource, tries=0): try: tracker.modify_text(user, oref, versionTitle, language, text, versionSource, method="API", skip_links=False) except UnicodeEncodeError: print u"UnicodeEncodeError: {}".format(oref.normal()) pass # there seems to be unicode error in "/app/sefaria/system/varnish/wrapper.py", line 55 except AutoReconnect: if tries < 200: modify_text(user, oref, versionTitle, language, text, versionSource, tries=tries+1) else: raise AutoReconnect("Tried so hard but got so many autoreconnects...") except AssertionError: pass
def modify_text_by_function(title, vtitle, lang, func, uid, **kwargs): """ Walks ever segment contained in title, calls func on the text and saves the result. """ from sefaria.tracker import modify_text section_refs = library.get_index(title).all_section_refs() for section_ref in section_refs: section = section_ref.text(vtitle=vtitle, lang=lang) segment_refs = section_ref.subrefs(len(section.text) if section.text else 0) if segment_refs: for i in range(len(section.text)): if section.text[i] and len(section.text[i]): text = func(section.text[i]) modify_text(uid, segment_refs[i], vtitle, lang, text, **kwargs)
def modify_text_by_function(title, vtitle, lang, rewrite_function, uid, needs_rewrite_function=lambda x: True, **kwargs): """ Walks ever segment contained in title, calls func on the text and saves the result. """ from sefaria.tracker import modify_text leaf_nodes = library.get_index(title).nodes.get_leaf_nodes() for leaf in leaf_nodes: oref = leaf.ref() ja = oref.text(lang, vtitle).ja() assert isinstance(ja, JaggedTextArray) modified_text = ja.modify_by_function(rewrite_function) if needs_rewrite_function(ja.array()): modify_text(uid, oref, vtitle, lang, modified_text, **kwargs)
def modify_text_by_function(title, vtitle, lang, func, uid, **kwargs): """ Walks ever segment contained in title, calls func on the text and saves the result. """ from sefaria.tracker import modify_text section_refs = library.get_index(title).all_section_refs() for section_ref in section_refs: section = section_ref.text(vtitle=vtitle, lang=lang) segment_refs = section_ref.subrefs( len(section.text) if section.text else 0) if segment_refs: for i in range(len(section.text)): if section.text[i] and len(section.text[i]): text = func(section.text[i]) modify_text(uid, segment_refs[i], vtitle, lang, text, **kwargs)
def _import_versions_from_csv(rows, columns, user_id): from sefaria.tracker import modify_text index_title = rows[0][columns[0]] # assume the same index title for all index_node = Ref(index_title).index_node action = "edit" for column in columns: # Create version version_title = rows[1][column] version_lang = rows[2][column] v = Version().load({ "title": index_title, "versionTitle": version_title, "language": version_lang }) if v is None: action = "add" v = Version({ "chapter": index_node.create_skeleton(), "title": index_title, "versionTitle": version_title, "language": version_lang, # Language "versionSource": rows[3][column], # Version Source "versionNotes": rows[4][column], # Version Notes }).save() # Populate it for row in rows[5:]: ref = Ref(row[0]) print "Saving: {}".format(ref.normal()) try: modify_text(user_id, ref, version_title, version_lang, row[column], type=action) except InputError: pass
def test_add_remove_links_with_text_save_complex(): title = 'Kos Shel Eliyahu on Pesach Haggadah' desired_link_count = 80 regex = Ref(title).regex() tref = 'Kos Shel Eliyahu on Pesach Haggadah, Kadesh 1' oref = Ref(tref) lang = 'en' vtitle = "test" stext = ["thlerkawje alkejal ekjlkej", "eaflkje arheahrlka jhklajdhkl ADJHKL"] tracker.modify_text(1, Ref(tref), vtitle, lang, stext) ls = LinkSet({"refs": {"$regex": regex}, "generated_by": "add_commentary_links"}) link_count = ls.count() assert link_count == desired_link_count+2 chunk = TextChunk(oref, lang, vtitle) chunk.text = [] tracker.modify_text(1, Ref(tref), vtitle, lang, chunk.text) ls = LinkSet({"refs": {"$regex": regex}, "generated_by": "add_commentary_links"}) link_count = ls.count() assert link_count == desired_link_count
def test_add_remove_links_with_text_save(): title = 'Rashi on Genesis' desired_link_count = 2027 regex = Ref(title).regex() tref = 'Rashi on Genesis 18:22' oref = Ref(tref) lang = 'he' vtitle = "test" stext = [u"כךל שדךלגכח ש ךלדקחכ ףךדלכח שףךדג", u"כךל שדךלגכח ש ךלדקחכ ףךדלכח שףךדג", u"כךל שדךלגכח ש ךלדקחכ ףךדלכח שףךדג"] tracker.modify_text(1, Ref(tref), vtitle, lang, stext) ls = LinkSet({"refs": {"$regex": regex}, "generated_by": "add_commentary_links"}) link_count = ls.count() assert link_count == (desired_link_count+1) chunk = TextChunk(oref, lang, vtitle) chunk.text = chunk.text[:-1] tracker.modify_text(1, Ref(tref), vtitle, lang, chunk.text) ls = LinkSet({"refs": {"$regex": regex}, "generated_by": "add_commentary_links"}) link_count = ls.count() assert link_count == desired_link_count
def test_refresh_links_with_text_save_complex(self): title = 'Kos Shel Eliyahu on Pesach Haggadah' section_tref = 'Kos Shel Eliyahu on Pesach Haggadah, Kadesh 1' stext = ["thlerkawje alkejal ekjlkej", "eaflkje arheahrlka jhklajdhkl ADJHKL"] lang = 'he' vtitle = "test" oref = Ref(section_tref) rf = Ref(title) regex = rf.regex() #original count desired_link_count = self.desired_link_counts[title] # add some text (adding two more comment than there is already) tracker.modify_text(1, oref, vtitle, lang, stext) link_count = LinkSet({"refs": {"$regex": regex}, "auto": True, "generated_by": "add_commentary_links"}).count() assert link_count == (desired_link_count+2) # now delete chunk = TextChunk(oref, lang, vtitle) chunk.text = chunk.text[:-2] tracker.modify_text(1, oref, vtitle, lang, chunk.text) link_count = LinkSet({"refs": {"$regex": regex}, "auto": True, "generated_by": "add_commentary_links"}).count() assert link_count == desired_link_count
def test_refresh_links_with_text_save(self): title = 'Rashi on Genesis' section_tref = 'Rashi on Genesis 18:22' stext = [u"כךל שדךלגכח ש ךלדקחכ ףךדלכח שףךדג", u"כךל שדךלגכח ש ךלדקחכ ףךדלכח שףךדג", u"כךל שדךלגכח ש ךלדקחכ ףךדלכח שףךדג"] lang = 'he' vtitle = "test" oref = Ref(section_tref) rf = Ref(title) regex = rf.regex() #original count desired_link_count = self.desired_link_counts[title] # add some text (adding one more comment than there is already) tracker.modify_text(1, oref, vtitle, lang, stext) link_count = LinkSet({"refs": {"$regex": regex}, "auto": True, "generated_by": "add_commentary_links"}).count() assert link_count == (desired_link_count+1) # now delete chunk = TextChunk(oref, lang, vtitle) chunk.text = chunk.text[:-1] tracker.modify_text(1, oref, vtitle, lang, chunk.text) link_count = LinkSet({"refs": {"$regex": regex}, "auto": True, "generated_by": "add_commentary_links"}).count() assert link_count == desired_link_count
def test_refresh_links_with_text_save_one_to_one_default_node(self): title_ref = "Onkelos Genesis 1" title = Ref(title_ref).index.title base = Ref(title_ref).index.base_text_titles[0] desired_link_count = self.desired_link_counts[title] rf = Ref(title) regex = rf.regex() # add another segment to intro text to show that it won't affect link count stext = [u"Intro first segment text", u"Intro second segment text"] oref = Ref("{}, Introduction 1".format(title)) tracker.modify_text(1, oref, "test", "en", stext) link_count = LinkSet({"refs": {"$regex": regex}, "auto": True, "generated_by": "add_commentary_links"}).count() assert link_count == desired_link_count # now add 2 segments to default node and check that exactly 2 more links exist than lang = 'he' vtitle = "test" oref = Ref(title_ref) stext = TextChunk(oref, lang=lang).text stext += [u"חדש", u"חדש"] tracker.modify_text(1, oref, vtitle, lang, stext) link_count = LinkSet( {"refs": {"$regex": regex}, "auto": True, "generated_by": "add_commentary_links"}).count() assert link_count == (desired_link_count + 2) # now delete 2 segments chunk = TextChunk(oref, lang, vtitle) chunk.text = chunk.text[:-2] tracker.modify_text(1, oref, vtitle, lang, chunk.text) link_count = LinkSet({"refs": {"$regex": regex}, "auto": True, "generated_by": "add_commentary_links"}).count() assert link_count == desired_link_count
def test_refresh_links_with_text_save_one_to_one_default_node(self): title_ref = "One to One on Genesis 1" title = Ref(title_ref).index.title base = Ref(title_ref).index.base_text_titles[0] desired_link_count = self.desired_link_counts[title] rf = Ref(title) regex = rf.regex() # add another segment to intro text to show that it won't affect link count stext = ["Intro first segment text", "Intro second segment text", "Intro third segment text"] oref = Ref("{}, Introduction 1".format(title)) tracker.modify_text(1, oref, "test", "en", stext) link_count = LinkSet({"refs": {"$regex": regex}, "auto": True, "generated_by": "add_commentary_links"}).count() assert link_count == desired_link_count # now add 2 segments to default node and check that exactly 2 more links exist than lang = 'en' vtitle = "test" oref = Ref(title_ref) stext = TextChunk(oref, lang=lang).text stext += ["new", "new"] tracker.modify_text(1, oref, vtitle, lang, stext) link_count = LinkSet( {"refs": {"$regex": regex}, "auto": True, "generated_by": "add_commentary_links"}).count() assert link_count == (desired_link_count + 2) # now delete 2 segments chunk = TextChunk(oref, lang, vtitle) chunk.text = chunk.text[:-2] tracker.modify_text(1, oref, vtitle, lang, chunk.text) link_count = LinkSet({"refs": {"$regex": regex}, "auto": True, "generated_by": "add_commentary_links"}).count() assert link_count == desired_link_count