def migrate_versions_of_text(versions, mappings, orig_title, new_title, base_index): for i, version in enumerate(versions): print(version.versionTitle.encode('utf-8')) new_version_title = version.title.replace(orig_title, new_title) print(new_version_title) new_version = Version( { "chapter": base_index.nodes.create_skeleton(), "versionTitle": version.versionTitle, "versionSource": version.versionSource, "language": version.language, "title": new_version_title } ) for attr in ['status', 'license', 'licenseVetted', 'method', 'versionNotes', 'priority', "digitizedBySefaria", "heversionSource"]: value = getattr(version, attr, None) if value: setattr(new_version, attr, value) new_version.save() for mapping in mappings: #this makes the mapping contain the correct text/commentary title orig_ref = mapping[0].replace(orig_title, version.title) print(orig_ref) orRef = Ref(orig_ref) tc = orRef.text(lang=version.language, vtitle=version.versionTitle) ref_text = tc.text #this makes the destination mapping contain both the correct text/commentary title # and have it changed to the temp index title dest_ref = mapping[1].replace(orig_title, version.title) dest_ref = dest_ref.replace(orig_title, new_title) print(dest_ref) dRef = Ref(dest_ref) ref_depth = dRef.range_index() if dRef.is_range() else len(dRef.sections) text_depth = 0 if isinstance(ref_text, str) else list_depth(ref_text) #length hack to fit the correct JA implied_depth = ref_depth + text_depth desired_depth = dRef.index_node.depth for i in range(implied_depth, desired_depth): ref_text = [ref_text] new_tc = dRef.text(lang=version.language, vtitle=version.versionTitle) new_tc.versionSource = version.versionSource new_tc.text = ref_text new_tc.save() VersionState(dRef.index.title).refresh() #links linker = dref.autolinker(user=8646) if linker: linker.refresh_links() add_links_from_text(dRef, new_version.language, new_tc.text, new_version._id, 8646) if i == 0: #links are the same across versions migrate_links_of_ref(orRef, dRef) #version history text_hist = HistorySet({"ref": {"$regex": orRef.regex()}, 'version': version.versionTitle }) for h in text_hist: new_h = h.copy() new_h.ref = translate_ref(Ref(h.ref), orRef, dRef).normal() new_h.save()
def migrate_versions_of_text(versions, mappings, orig_title, new_title, base_index): for i, version in enumerate(versions): print version.versionTitle.encode('utf-8') new_version_title = version.title.replace(orig_title, new_title) print new_version_title new_version = Version( { "chapter": base_index.nodes.create_skeleton(), "versionTitle": version.versionTitle, "versionSource": version.versionSource, "language": version.language, "title": new_version_title } ) for attr in ['status', 'license', 'licenseVetted', 'method', 'versionNotes', 'priority', "digitizedBySefaria", "heversionSource"]: value = getattr(version, attr, None) if value: setattr(new_version, attr, value) new_version.save() for mapping in mappings: #this makes the mapping contain the correct text/commentary title orig_ref = mapping[0].replace(orig_title, version.title) print orig_ref orRef = Ref(orig_ref) tc = orRef.text(lang=version.language, vtitle=version.versionTitle) ref_text = tc.text #this makes the destination mapping contain both the correct text/commentary title # and have it changed to the temp index title dest_ref = mapping[1].replace(orig_title, version.title) dest_ref = dest_ref.replace(orig_title, new_title) print dest_ref dRef = Ref(dest_ref) ref_depth = dRef.range_index() if dRef.is_range() else len(dRef.sections) text_depth = 0 if isinstance(ref_text, basestring) else list_depth(ref_text) #length hack to fit the correct JA implied_depth = ref_depth + text_depth desired_depth = dRef.index_node.depth for i in range(implied_depth, desired_depth): ref_text = [ref_text] new_tc = dRef.text(lang=version.language, vtitle=version.versionTitle) new_tc.versionSource = version.versionSource new_tc.text = ref_text new_tc.save() VersionState(dRef.index.title).refresh() #links if dRef.is_commentary(): add_commentary_links(dRef, 8646) add_links_from_text(dRef.normal(), new_version.language, new_tc.text, new_version._id, 8646) if i == 0: #links are the same across versions migrate_links_of_ref(orRef, dRef) #version history text_hist = HistorySet({"ref": {"$regex": orRef.regex()}, 'version': version.versionTitle }) for h in text_hist: new_h = h.copy() new_h.ref = translate_ref(Ref(h.ref), orRef, dRef).normal() new_h.save()
def migrate_versions_of_text(versions, mappings, orig_title, new_title, base_index): for i, version in enumerate(versions): print version.versionTitle.encode('utf-8') new_version_title = version.title.replace(orig_title, new_title) print new_version_title new_version = Version({ "chapter": base_index.nodes.create_skeleton(), "versionTitle": version.versionTitle, "versionSource": version.versionSource, "language": version.language, "title": new_version_title }) for attr in [ 'status', 'license', 'licenseVetted', 'method', 'versionNotes', 'priority', "digitizedBySefaria", "heversionSource" ]: value = getattr(version, attr, None) if value: setattr(new_version, attr, value) new_version.save() for orig_ref in mappings: #this makes the mapping contain the correct text/commentary title orig_ref = orig_ref.replace(orig_title, version.title) print orig_ref orRef = Ref(orig_ref) tc = orRef.text(lang=version.language, vtitle=version.versionTitle) ref_text = tc.text #this makes the destination mapping contain both the correct text/commentary title # and have it changed to the temp index title dest_ref = mappings[orig_ref].replace(orig_title, version.title) dest_ref = dest_ref.replace(orig_title, new_title) print dest_ref dRef = Ref(dest_ref) ref_depth = dRef.range_index() if dRef.is_range() else len( dRef.sections) text_depth = 0 if isinstance(ref_text, basestring) else list_depth( ref_text) #length hack to fit the correct JA implied_depth = ref_depth + text_depth desired_depth = dRef.index_node.depth for i in range(implied_depth, desired_depth): ref_text = [ref_text] new_tc = dRef.text(lang=version.language, vtitle=version.versionTitle) new_tc.versionSource = version.versionSource new_tc.text = ref_text new_tc.save() VersionState(dRef.index.title).refresh()
def merge_translations(text, sources): """ This is a recursive function that merges the text in multiple translations to fill any gaps and deliver as much text as possible. e.g. [["a", ""], ["", "b", "c"]] becomes ["a", "b", "c"] """ if not (len(text) and len(sources)): return ["", []] depth = list_depth(text) if depth > 2: results = [] result_sources = [] for x in range(max(map(len, text))): translations = map(None, *text)[x] remove_nones = lambda x: x or [] result, source = merge_translations( map(remove_nones, translations), sources) results.append(result) # NOTE - the below flattens the sources list, so downstream code can always expect # a one dimensional list, but in so doing the mapping of source names to segments # is lost for merged texts of depth > 2 (this mapping is not currenly used in general) result_sources += source return [results, result_sources] if depth == 1: text = map(lambda x: [x], text) merged = map(None, *text) text = [] text_sources = [] for verses in merged: # Look for the first non empty version (which will be the oldest, or one with highest priority) index, value = 0, 0 for i, version in enumerate(verses): if version: index = i value = version break text.append(value) text_sources.append(sources[index]) if depth == 1: # strings were earlier wrapped in lists, now unwrap text = text[0] return [text, text_sources]
def migrate_versions_of_text(versions, mappings, orig_title, new_title, base_index): for i, version in enumerate(versions): print version.versionTitle.encode('utf-8') new_version_title = version.title.replace(orig_title, new_title) print new_version_title new_version = Version( { "chapter": base_index.nodes.create_skeleton(), "versionTitle": version.versionTitle, "versionSource": version.versionSource, "language": version.language, "title": new_version_title } ) for attr in ['status', 'license', 'licenseVetted', 'method', 'versionNotes', 'priority', "digitizedBySefaria", "heversionSource"]: value = getattr(version, attr, None) if value: setattr(new_version, attr, value) new_version.save() for orig_ref in mappings: #this makes the mapping contain the correct text/commentary title orig_ref = orig_ref.replace(orig_title, version.title) print orig_ref orRef = Ref(orig_ref) tc = orRef.text(lang=version.language, vtitle=version.versionTitle) ref_text = tc.text #this makes the destination mapping contain both the correct text/commentary title # and have it changed to the temp index title dest_ref = mappings[orig_ref].replace(orig_title, version.title) dest_ref = dest_ref.replace(orig_title, new_title) print dest_ref dRef = Ref(dest_ref) ref_depth = dRef.range_index() if dRef.is_range() else len(dRef.sections) text_depth = 0 if isinstance(ref_text, basestring) else list_depth(ref_text) #length hack to fit the correct JA implied_depth = ref_depth + text_depth desired_depth = dRef.index_node.depth for i in range(implied_depth, desired_depth): ref_text = [ref_text] new_tc = dRef.text(lang=version.language, vtitle=version.versionTitle) new_tc.versionSource = version.versionSource new_tc.text = ref_text new_tc.save() VersionState(dRef.index.title).refresh()
def merge_translations(text, sources): """ This is a recursive function that merges the text in multiple translations to fill any gaps and deliver as much text as possible. e.g. [["a", ""], ["", "b", "c"]] becomes ["a", "b", "c"] """ if not (len(text) and len(sources)): return ["", []] depth = list_depth(text) if depth > 2: results = [] result_sources = [] for x in range(max(map(len, text))): translations = map(None, *text)[x] remove_nones = lambda x: x or [] result, source = merge_translations(map(remove_nones, translations), sources) results.append(result) # NOTE - the below flattens the sources list, so downstream code can always expect # a one dimensional list, but in so doing the mapping of source names to segments # is lost for merged texts of depth > 2 (this mapping is not currenly used in general) result_sources += source return [results, result_sources] if depth == 1: text = map(lambda x: [x], text) merged = map(None, *text) text = [] text_sources = [] for verses in merged: # Look for the first non empty version (which will be the oldest, or one with highest priority) index, value = 0, 0 for i, version in enumerate(verses): if version: index = i value = version break text.append(value) text_sources.append(sources[index]) if depth == 1: # strings were earlier wrapped in lists, now unwrap text = text[0] return [text, text_sources]
def validate_text(text, tref): """ validate a dictionary representing a text to be written to db.texts """ # Required Keys for key in ("versionTitle", "versionSource", "language", "text"): if not key in text: return {"error": "Field '%s' missing from posted JSON." % key} oref = model.Ref(tref) # Validate depth of posted text matches expectation posted_depth = 0 if isinstance(text["text"], basestring) else list_depth(text["text"]) implied_depth = len(oref.sections) + posted_depth if implied_depth != oref.index.textDepth: raise InputError( u"Text Structure Mismatch. The stored depth of {} is {}, but the text posted to {} implies a depth of {}." .format(oref.book, oref.index.textDepth, tref, implied_depth)) return {"status": "ok"}
def validate_text(text, tref): """ validate a dictionary representing a text to be written to db.texts """ # Required Keys for key in ("versionTitle", "versionSource", "language", "text"): if not key in text: return {"error": "Field '%s' missing from posted JSON." % key} oref = model.Ref(tref) # Validate depth of posted text matches expectation posted_depth = 0 if isinstance(text["text"], basestring) else list_depth( text["text"]) implied_depth = len(oref.sections) + posted_depth if implied_depth != oref.index.textDepth: raise InputError( u"Text Structure Mismatch. The stored depth of {} is {}, but the text posted to {} implies a depth of {}." .format(oref.book, oref.index.textDepth, tref, implied_depth)) return {"status": "ok"}
def migrate_versions_of_text(versions, mappings, orig_title, new_title, base_index): for version in versions: new_version_title = version.title.replace(orig_title, new_title) print new_version_title new_version = Version({ "chapter": base_index.nodes.create_skeleton(), "versionTitle": version.versionTitle, "versionSource": version.versionSource, "language": version.language, "title": new_version_title }) for attr in ['status', 'license', 'licenseVetted']: value = getattr(version, attr, None) if value: setattr(new_version, attr, value) new_version.save() for mapping in mappings: #this makes the mapping contain the correct text/commentary title orig_ref = mapping[0].replace(orig_title, version.title) print orig_ref orRef = Ref(orig_ref) tc = orRef.text(lang=version.language, vtitle=version.versionTitle) ref_text = tc.text #this makes the destination mapping contain both the correct text/commentary title # and have it changed to the temp index title dest_ref = mapping[1].replace(orig_title, version.title) dest_ref = dest_ref.replace(orig_title, new_title) print dest_ref dRef = Ref(dest_ref) ref_depth = dRef.range_index() if dRef.is_range() else len( dRef.sections) text_depth = 0 if isinstance(ref_text, basestring) else list_depth( ref_text) #length hack to fit the correct JA implied_depth = ref_depth + text_depth desired_depth = dRef.index_node.depth for i in range(implied_depth, desired_depth): ref_text = [ref_text] new_tc = dRef.text(lang=version.language, vtitle=version.versionTitle) new_tc.versionSource = version.versionSource new_tc.text = ref_text new_tc.save() VersionState(dRef.index.title).refresh() #links if dRef.is_commentary(): add_commentary_links(dRef, 8646) add_links_from_text(dRef.normal(), new_version.language, new_tc.text, new_version._id, 8646) migrate_links_of_ref(orRef, dRef) #version history text_hist = HistorySet({ "ref": { "$regex": orRef.regex() }, 'version': version.versionTitle }) for h in text_hist: new_h = h.copy() new_h.ref = translate_ref(Ref(h.ref), orRef, dRef).normal() new_h.save()
def text_from_cur(ref, textCur, context): """ Take a parsed ref and DB cursor of texts and construct a text to return out of what's available. Merges text fragments when necessary so that the final version has maximum text. """ versions = [] versionTitles = [] versionSources = [] versionStatuses = [] versionLicenses = [] versionStatuses = [] versionLicenses = [] versionNotes = [] versionBySefaria = [] # does this ref refer to a range of text is_range = ref["sections"] != ref["toSections"] for t in textCur: try: text = t['chapter'][0] if len( ref["sectionNames"]) > 1 else t['chapter'] if text == "" or text == []: continue if len(ref['sections']) < len( ref['sectionNames']) or context == 0 and not is_range: sections = ref['sections'][1:] if len(ref["sectionNames"]) == 1 and context == 0: sections = ref["sections"] else: # include surrounding text sections = ref['sections'][1:-1] # dive down into text until the requested segment is found for i in sections: text = text[int(i) - 1] if is_range and context == 0: start = ref["sections"][-1] - 1 end = ref["toSections"][-1] text = text[start:end] versions.append(text) versionTitles.append(t.get("versionTitle", "")) versionSources.append(t.get("versionSource", "")) versionStatuses.append(t.get("status", "none")) license = t.get("license", "unknown") if t.get( "licenseVetted", False) else "unknown" versionLicenses.append(license) versionNotes.append(t.get("versionNotes", "")) versionBySefaria.append(t.get("digitizedBySefaria", False)) except IndexError: # this happens when t doesn't have the text we're looking for pass if list_depth(versions) == 1: while '' in versions: versions.remove('') if len(versions) == 0: ref['text'] = "" if context == 0 else [] elif len(versions) == 1: ref['text'] = versions[0] ref['versionTitle'] = versionTitles[0] ref['versionSource'] = versionSources[0] ref['versionStatus'] = versionStatuses[0] ref['license'] = versionLicenses[0] if versionNotes[0]: ref['versionNotes'] = versionNotes[0] if versionBySefaria[0]: ref['digitizedBySefaria'] = versionBySefaria[0] elif len(versions) > 1: ref['text'], ref['sources'] = merge_translations( versions, versionTitles) if len([x for x in set(ref['sources'])]) == 1: # if sources only lists one title, no merge acually happened ref['versionTitle'] = ref['sources'][0] i = versionTitles.index(ref['sources'][0]) ref['versionSource'] = versionSources[i] ref['versionStatus'] = versionStatuses[i] ref['license'] = versionLicenses[i] if versionNotes[i]: ref['versionNotes'] = versionNotes[i] if versionBySefaria[i]: ref['digitizedBySefaria'] = versionBySefaria[i] del ref['sources'] return ref
def text_from_cur(ref, textCur, context): """ Take a parsed ref and DB cursor of texts and construct a text to return out of what's available. Merges text fragments when necessary so that the final version has maximum text. """ versions = [] versionTitles = [] versionSources = [] versionStatuses = [] versionLicenses = [] versionStatuses = [] versionLicenses = [] versionNotes = [] versionBySefaria = [] # does this ref refer to a range of text is_range = ref["sections"] != ref["toSections"] for t in textCur: try: text = t['chapter'][0] if len(ref["sectionNames"]) > 1 else t['chapter'] if text == "" or text == []: continue if len(ref['sections']) < len(ref['sectionNames']) or context == 0 and not is_range: sections = ref['sections'][1:] if len(ref["sectionNames"]) == 1 and context == 0: sections = ref["sections"] else: # include surrounding text sections = ref['sections'][1:-1] # dive down into text until the requested segment is found for i in sections: text = text[int(i) - 1] if is_range and context == 0: start = ref["sections"][-1] - 1 end = ref["toSections"][-1] text = text[start:end] versions.append(text) versionTitles.append(t.get("versionTitle", "")) versionSources.append(t.get("versionSource", "")) versionStatuses.append(t.get("status", "none")) license = t.get("license", "unknown") if t.get("licenseVetted", False) else "unknown" versionLicenses.append(license) versionNotes.append(t.get("versionNotes", "")) versionBySefaria.append(t.get("digitizedBySefaria", False)) except IndexError: # this happens when t doesn't have the text we're looking for pass if list_depth(versions) == 1: while '' in versions: versions.remove('') if len(versions) == 0: ref['text'] = "" if context == 0 else [] elif len(versions) == 1: ref['text'] = versions[0] ref['versionTitle'] = versionTitles[0] ref['versionSource'] = versionSources[0] ref['versionStatus'] = versionStatuses[0] ref['license'] = versionLicenses[0] if versionNotes[0]: ref['versionNotes'] = versionNotes[0] if versionBySefaria[0]: ref['digitizedBySefaria'] = versionBySefaria[0] elif len(versions) > 1: ref['text'], ref['sources'] = merge_translations(versions, versionTitles) if len([x for x in set(ref['sources'])]) == 1: # if sources only lists one title, no merge acually happened ref['versionTitle'] = ref['sources'][0] i = versionTitles.index(ref['sources'][0]) ref['versionSource'] = versionSources[i] ref['versionStatus'] = versionStatuses[i] ref['license'] = versionLicenses[i] if versionNotes[i]: ref['versionNotes'] = versionNotes[i] if versionBySefaria[i]: ref['digitizedBySefaria'] = versionBySefaria[i] del ref['sources'] return ref