示例#1
0
def migrate_versions_of_text(versions, mappings, orig_title, new_title, base_index):
    for i, version in enumerate(versions):
        print(version.versionTitle.encode('utf-8'))
        new_version_title = version.title.replace(orig_title, new_title)
        print(new_version_title)
        new_version = Version(
                {
                    "chapter": base_index.nodes.create_skeleton(),
                    "versionTitle": version.versionTitle,
                    "versionSource": version.versionSource,
                    "language": version.language,
                    "title": new_version_title
                }
            )
        for attr in ['status', 'license', 'licenseVetted', 'method', 'versionNotes', 'priority', "digitizedBySefaria", "heversionSource"]:
            value = getattr(version, attr, None)
            if value:
                setattr(new_version, attr, value)
        new_version.save()
        for mapping in mappings:
            #this makes the mapping contain the correct text/commentary title
            orig_ref = mapping[0].replace(orig_title, version.title)
            print(orig_ref)
            orRef = Ref(orig_ref)
            tc = orRef.text(lang=version.language, vtitle=version.versionTitle)
            ref_text = tc.text

            #this makes the destination mapping contain both the correct text/commentary title
            # and have it changed to the temp index title
            dest_ref = mapping[1].replace(orig_title, version.title)
            dest_ref = dest_ref.replace(orig_title, new_title)
            print(dest_ref)

            dRef = Ref(dest_ref)
            ref_depth = dRef.range_index() if dRef.is_range() else len(dRef.sections)
            text_depth = 0 if isinstance(ref_text, str) else list_depth(ref_text) #length hack to fit the correct JA
            implied_depth = ref_depth + text_depth
            desired_depth = dRef.index_node.depth
            for i in range(implied_depth, desired_depth):
                ref_text = [ref_text]

            new_tc = dRef.text(lang=version.language, vtitle=version.versionTitle)
            new_tc.versionSource = version.versionSource
            new_tc.text = ref_text
            new_tc.save()
            VersionState(dRef.index.title).refresh()
            #links
            linker = dref.autolinker(user=8646)
            if linker:
                linker.refresh_links()
            add_links_from_text(dRef, new_version.language, new_tc.text, new_version._id, 8646)
            if i == 0: #links are the same across versions
                migrate_links_of_ref(orRef, dRef)
            #version history
            text_hist = HistorySet({"ref": {"$regex": orRef.regex()}, 'version': version.versionTitle })
            for h in text_hist:
                new_h = h.copy()
                new_h.ref = translate_ref(Ref(h.ref), orRef, dRef).normal()
                new_h.save()
def migrate_versions_of_text(versions, mappings, orig_title, new_title, base_index):
    for i, version in enumerate(versions):
        print version.versionTitle.encode('utf-8')
        new_version_title = version.title.replace(orig_title, new_title)
        print new_version_title
        new_version = Version(
                {
                    "chapter": base_index.nodes.create_skeleton(),
                    "versionTitle": version.versionTitle,
                    "versionSource": version.versionSource,
                    "language": version.language,
                    "title": new_version_title
                }
            )
        for attr in ['status', 'license', 'licenseVetted', 'method', 'versionNotes', 'priority', "digitizedBySefaria", "heversionSource"]:
            value = getattr(version, attr, None)
            if value:
                setattr(new_version, attr, value)
        new_version.save()
        for mapping in mappings:
            #this makes the mapping contain the correct text/commentary title
            orig_ref = mapping[0].replace(orig_title, version.title)
            print orig_ref
            orRef = Ref(orig_ref)
            tc = orRef.text(lang=version.language, vtitle=version.versionTitle)
            ref_text = tc.text

            #this makes the destination mapping contain both the correct text/commentary title
            # and have it changed to the temp index title
            dest_ref = mapping[1].replace(orig_title, version.title)
            dest_ref = dest_ref.replace(orig_title, new_title)
            print dest_ref

            dRef = Ref(dest_ref)
            ref_depth = dRef.range_index() if dRef.is_range() else len(dRef.sections)
            text_depth = 0 if isinstance(ref_text, basestring) else list_depth(ref_text) #length hack to fit the correct JA
            implied_depth = ref_depth + text_depth
            desired_depth = dRef.index_node.depth
            for i in range(implied_depth, desired_depth):
                ref_text = [ref_text]

            new_tc = dRef.text(lang=version.language, vtitle=version.versionTitle)
            new_tc.versionSource = version.versionSource
            new_tc.text = ref_text
            new_tc.save()
            VersionState(dRef.index.title).refresh()
            #links
            if dRef.is_commentary():
                add_commentary_links(dRef, 8646)
            add_links_from_text(dRef.normal(), new_version.language, new_tc.text, new_version._id, 8646)
            if i == 0: #links are the same across versions
                migrate_links_of_ref(orRef, dRef)
            #version history
            text_hist = HistorySet({"ref": {"$regex": orRef.regex()}, 'version': version.versionTitle })
            for h in text_hist:
                new_h = h.copy()
                new_h.ref = translate_ref(Ref(h.ref), orRef, dRef).normal()
                new_h.save()
示例#3
0
def migrate_versions_of_text(versions, mappings, orig_title, new_title,
                             base_index):
    for i, version in enumerate(versions):
        print version.versionTitle.encode('utf-8')
        new_version_title = version.title.replace(orig_title, new_title)
        print new_version_title
        new_version = Version({
            "chapter": base_index.nodes.create_skeleton(),
            "versionTitle": version.versionTitle,
            "versionSource": version.versionSource,
            "language": version.language,
            "title": new_version_title
        })
        for attr in [
                'status', 'license', 'licenseVetted', 'method', 'versionNotes',
                'priority', "digitizedBySefaria", "heversionSource"
        ]:
            value = getattr(version, attr, None)
            if value:
                setattr(new_version, attr, value)
        new_version.save()
        for orig_ref in mappings:
            #this makes the mapping contain the correct text/commentary title
            orig_ref = orig_ref.replace(orig_title, version.title)
            print orig_ref
            orRef = Ref(orig_ref)
            tc = orRef.text(lang=version.language, vtitle=version.versionTitle)
            ref_text = tc.text

            #this makes the destination mapping contain both the correct text/commentary title
            # and have it changed to the temp index title
            dest_ref = mappings[orig_ref].replace(orig_title, version.title)
            dest_ref = dest_ref.replace(orig_title, new_title)
            print dest_ref

            dRef = Ref(dest_ref)
            ref_depth = dRef.range_index() if dRef.is_range() else len(
                dRef.sections)
            text_depth = 0 if isinstance(ref_text, basestring) else list_depth(
                ref_text)  #length hack to fit the correct JA
            implied_depth = ref_depth + text_depth
            desired_depth = dRef.index_node.depth
            for i in range(implied_depth, desired_depth):
                ref_text = [ref_text]

            new_tc = dRef.text(lang=version.language,
                               vtitle=version.versionTitle)
            new_tc.versionSource = version.versionSource
            new_tc.text = ref_text
            new_tc.save()
            VersionState(dRef.index.title).refresh()
示例#4
0
def merge_translations(text, sources):
    """
	This is a recursive function that merges the text in multiple
	translations to fill any gaps and deliver as much text as
	possible.
	e.g. [["a", ""], ["", "b", "c"]] becomes ["a", "b", "c"]
	"""
    if not (len(text) and len(sources)):
        return ["", []]

    depth = list_depth(text)
    if depth > 2:
        results = []
        result_sources = []
        for x in range(max(map(len, text))):
            translations = map(None, *text)[x]
            remove_nones = lambda x: x or []
            result, source = merge_translations(
                map(remove_nones, translations), sources)
            results.append(result)
            # NOTE - the below flattens the sources list, so downstream code can always expect
            # a one dimensional list, but in so doing the mapping of source names to segments
            # is lost for merged texts of depth > 2 (this mapping is not currenly used in general)
            result_sources += source
        return [results, result_sources]

    if depth == 1:
        text = map(lambda x: [x], text)

    merged = map(None, *text)
    text = []
    text_sources = []
    for verses in merged:
        # Look for the first non empty version (which will be the oldest, or one with highest priority)
        index, value = 0, 0
        for i, version in enumerate(verses):
            if version:
                index = i
                value = version
                break
        text.append(value)
        text_sources.append(sources[index])

    if depth == 1:
        # strings were earlier wrapped in lists, now unwrap
        text = text[0]
    return [text, text_sources]
示例#5
0
def migrate_versions_of_text(versions, mappings, orig_title, new_title, base_index):
    for i, version in enumerate(versions):
        print version.versionTitle.encode('utf-8')
        new_version_title = version.title.replace(orig_title, new_title)
        print new_version_title
        new_version = Version(
                {
                    "chapter": base_index.nodes.create_skeleton(),
                    "versionTitle": version.versionTitle,
                    "versionSource": version.versionSource,
                    "language": version.language,
                    "title": new_version_title
                }
            )
        for attr in ['status', 'license', 'licenseVetted', 'method', 'versionNotes', 'priority', "digitizedBySefaria", "heversionSource"]:
            value = getattr(version, attr, None)
            if value:
                setattr(new_version, attr, value)
        new_version.save()
        for orig_ref in mappings:
            #this makes the mapping contain the correct text/commentary title
            orig_ref = orig_ref.replace(orig_title, version.title)
            print orig_ref
            orRef = Ref(orig_ref)
            tc = orRef.text(lang=version.language, vtitle=version.versionTitle)
            ref_text = tc.text

            #this makes the destination mapping contain both the correct text/commentary title
            # and have it changed to the temp index title
            dest_ref = mappings[orig_ref].replace(orig_title, version.title)
            dest_ref = dest_ref.replace(orig_title, new_title)
            print dest_ref

            dRef = Ref(dest_ref)
            ref_depth = dRef.range_index() if dRef.is_range() else len(dRef.sections)
            text_depth = 0 if isinstance(ref_text, basestring) else list_depth(ref_text) #length hack to fit the correct JA
            implied_depth = ref_depth + text_depth
            desired_depth = dRef.index_node.depth
            for i in range(implied_depth, desired_depth):
                ref_text = [ref_text]

            new_tc = dRef.text(lang=version.language, vtitle=version.versionTitle)
            new_tc.versionSource = version.versionSource
            new_tc.text = ref_text
            new_tc.save()
            VersionState(dRef.index.title).refresh()
示例#6
0
def merge_translations(text, sources):
	"""
	This is a recursive function that merges the text in multiple
	translations to fill any gaps and deliver as much text as
	possible.
	e.g. [["a", ""], ["", "b", "c"]] becomes ["a", "b", "c"]
	"""
	if not (len(text) and len(sources)):
		return ["", []]

	depth = list_depth(text)
	if depth > 2:
		results = []
		result_sources = []
		for x in range(max(map(len, text))):
			translations = map(None, *text)[x]
			remove_nones = lambda x: x or []
			result, source = merge_translations(map(remove_nones, translations), sources)
			results.append(result)
			# NOTE - the below flattens the sources list, so downstream code can always expect
			# a one dimensional list, but in so doing the mapping of source names to segments
			# is lost for merged texts of depth > 2 (this mapping is not currenly used in general)
			result_sources += source
		return [results, result_sources]

	if depth == 1:
		text = map(lambda x: [x], text)

	merged = map(None, *text)
	text = []
	text_sources = []
	for verses in merged:
		# Look for the first non empty version (which will be the oldest, or one with highest priority)
		index, value = 0, 0
		for i, version in enumerate(verses):
			if version:
				index = i
				value = version
				break
		text.append(value)
		text_sources.append(sources[index])

	if depth == 1:
		# strings were earlier wrapped in lists, now unwrap
		text = text[0]
	return [text, text_sources]
示例#7
0
def validate_text(text, tref):
	"""
	validate a dictionary representing a text to be written to db.texts
	"""
	# Required Keys
	for key in ("versionTitle", "versionSource", "language", "text"):
		if not key in text:
			return {"error": "Field '%s' missing from posted JSON."  % key}
	oref = model.Ref(tref)

	# Validate depth of posted text matches expectation
	posted_depth = 0 if isinstance(text["text"], basestring) else list_depth(text["text"])
	implied_depth = len(oref.sections) + posted_depth
	if implied_depth != oref.index.textDepth:
		raise InputError(
			u"Text Structure Mismatch. The stored depth of {} is {}, but the text posted to {} implies a depth of {}."
			.format(oref.book, oref.index.textDepth, tref, implied_depth))

	return {"status": "ok"}
示例#8
0
def validate_text(text, tref):
    """
	validate a dictionary representing a text to be written to db.texts
	"""
    # Required Keys
    for key in ("versionTitle", "versionSource", "language", "text"):
        if not key in text:
            return {"error": "Field '%s' missing from posted JSON." % key}
    oref = model.Ref(tref)

    # Validate depth of posted text matches expectation
    posted_depth = 0 if isinstance(text["text"], basestring) else list_depth(
        text["text"])
    implied_depth = len(oref.sections) + posted_depth
    if implied_depth != oref.index.textDepth:
        raise InputError(
            u"Text Structure Mismatch. The stored depth of {} is {}, but the text posted to {} implies a depth of {}."
            .format(oref.book, oref.index.textDepth, tref, implied_depth))

    return {"status": "ok"}
def migrate_versions_of_text(versions, mappings, orig_title, new_title,
                             base_index):
    for version in versions:
        new_version_title = version.title.replace(orig_title, new_title)
        print new_version_title
        new_version = Version({
            "chapter": base_index.nodes.create_skeleton(),
            "versionTitle": version.versionTitle,
            "versionSource": version.versionSource,
            "language": version.language,
            "title": new_version_title
        })
        for attr in ['status', 'license', 'licenseVetted']:
            value = getattr(version, attr, None)
            if value:
                setattr(new_version, attr, value)
        new_version.save()
        for mapping in mappings:
            #this makes the mapping contain the correct text/commentary title
            orig_ref = mapping[0].replace(orig_title, version.title)
            print orig_ref
            orRef = Ref(orig_ref)
            tc = orRef.text(lang=version.language, vtitle=version.versionTitle)
            ref_text = tc.text

            #this makes the destination mapping contain both the correct text/commentary title
            # and have it changed to the temp index title
            dest_ref = mapping[1].replace(orig_title, version.title)
            dest_ref = dest_ref.replace(orig_title, new_title)
            print dest_ref

            dRef = Ref(dest_ref)
            ref_depth = dRef.range_index() if dRef.is_range() else len(
                dRef.sections)
            text_depth = 0 if isinstance(ref_text, basestring) else list_depth(
                ref_text)  #length hack to fit the correct JA
            implied_depth = ref_depth + text_depth
            desired_depth = dRef.index_node.depth
            for i in range(implied_depth, desired_depth):
                ref_text = [ref_text]

            new_tc = dRef.text(lang=version.language,
                               vtitle=version.versionTitle)
            new_tc.versionSource = version.versionSource
            new_tc.text = ref_text
            new_tc.save()
            VersionState(dRef.index.title).refresh()
            #links
            if dRef.is_commentary():
                add_commentary_links(dRef, 8646)
            add_links_from_text(dRef.normal(), new_version.language,
                                new_tc.text, new_version._id, 8646)
            migrate_links_of_ref(orRef, dRef)
            #version history
            text_hist = HistorySet({
                "ref": {
                    "$regex": orRef.regex()
                },
                'version': version.versionTitle
            })
            for h in text_hist:
                new_h = h.copy()
                new_h.ref = translate_ref(Ref(h.ref), orRef, dRef).normal()
                new_h.save()
示例#10
0
def text_from_cur(ref, textCur, context):
    """
	Take a parsed ref and DB cursor of texts and construct a text to return out of what's available.
	Merges text fragments when necessary so that the final version has maximum text.
	"""
    versions = []
    versionTitles = []
    versionSources = []
    versionStatuses = []
    versionLicenses = []
    versionStatuses = []
    versionLicenses = []
    versionNotes = []
    versionBySefaria = []
    # does this ref refer to a range of text
    is_range = ref["sections"] != ref["toSections"]

    for t in textCur:
        try:
            text = t['chapter'][0] if len(
                ref["sectionNames"]) > 1 else t['chapter']
            if text == "" or text == []:
                continue
            if len(ref['sections']) < len(
                    ref['sectionNames']) or context == 0 and not is_range:
                sections = ref['sections'][1:]
                if len(ref["sectionNames"]) == 1 and context == 0:
                    sections = ref["sections"]
            else:
                # include surrounding text
                sections = ref['sections'][1:-1]
            # dive down into text until the requested segment is found
            for i in sections:
                text = text[int(i) - 1]
            if is_range and context == 0:
                start = ref["sections"][-1] - 1
                end = ref["toSections"][-1]
                text = text[start:end]
            versions.append(text)
            versionTitles.append(t.get("versionTitle", ""))
            versionSources.append(t.get("versionSource", ""))
            versionStatuses.append(t.get("status", "none"))
            license = t.get("license", "unknown") if t.get(
                "licenseVetted", False) else "unknown"
            versionLicenses.append(license)
            versionNotes.append(t.get("versionNotes", ""))
            versionBySefaria.append(t.get("digitizedBySefaria", False))

        except IndexError:
            # this happens when t doesn't have the text we're looking for
            pass

    if list_depth(versions) == 1:
        while '' in versions:
            versions.remove('')

    if len(versions) == 0:
        ref['text'] = "" if context == 0 else []

    elif len(versions) == 1:
        ref['text'] = versions[0]
        ref['versionTitle'] = versionTitles[0]
        ref['versionSource'] = versionSources[0]
        ref['versionStatus'] = versionStatuses[0]
        ref['license'] = versionLicenses[0]
        if versionNotes[0]:
            ref['versionNotes'] = versionNotes[0]
        if versionBySefaria[0]:
            ref['digitizedBySefaria'] = versionBySefaria[0]

    elif len(versions) > 1:
        ref['text'], ref['sources'] = merge_translations(
            versions, versionTitles)
        if len([x for x in set(ref['sources'])]) == 1:
            # if sources only lists one title, no merge acually happened
            ref['versionTitle'] = ref['sources'][0]
            i = versionTitles.index(ref['sources'][0])
            ref['versionSource'] = versionSources[i]
            ref['versionStatus'] = versionStatuses[i]
            ref['license'] = versionLicenses[i]
            if versionNotes[i]:
                ref['versionNotes'] = versionNotes[i]
            if versionBySefaria[i]:
                ref['digitizedBySefaria'] = versionBySefaria[i]

            del ref['sources']

    return ref
示例#11
0
def text_from_cur(ref, textCur, context):
	"""
	Take a parsed ref and DB cursor of texts and construct a text to return out of what's available.
	Merges text fragments when necessary so that the final version has maximum text.
	"""
	versions         = []
	versionTitles    = []
	versionSources   = []
	versionStatuses  = []
	versionLicenses  = []
	versionStatuses  = []
	versionLicenses  = []
	versionNotes     = []
	versionBySefaria = []
	# does this ref refer to a range of text
	is_range = ref["sections"] != ref["toSections"]

	for t in textCur:
		try:
			text = t['chapter'][0] if len(ref["sectionNames"]) > 1 else t['chapter']
			if text == "" or text == []:
				continue
			if len(ref['sections']) < len(ref['sectionNames']) or context == 0 and not is_range:
				sections = ref['sections'][1:]
				if len(ref["sectionNames"]) == 1 and context == 0:
					sections = ref["sections"]
			else:
				# include surrounding text
				sections = ref['sections'][1:-1]
			# dive down into text until the requested segment is found
			for i in sections:
				text = text[int(i) - 1]
			if is_range and context == 0:
				start = ref["sections"][-1] - 1
				end = ref["toSections"][-1]
				text = text[start:end]
			versions.append(text)
			versionTitles.append(t.get("versionTitle", ""))
			versionSources.append(t.get("versionSource", ""))
			versionStatuses.append(t.get("status", "none"))
			license = t.get("license", "unknown") if t.get("licenseVetted", False) else "unknown"
			versionLicenses.append(license)
			versionNotes.append(t.get("versionNotes", ""))
			versionBySefaria.append(t.get("digitizedBySefaria", False))

		except IndexError:
			# this happens when t doesn't have the text we're looking for
			pass

	if list_depth(versions) == 1:
		while '' in versions:
			versions.remove('')

	if len(versions) == 0:
		ref['text'] = "" if context == 0 else []

	elif len(versions) == 1:
		ref['text']               = versions[0]
		ref['versionTitle']       = versionTitles[0]
		ref['versionSource']      = versionSources[0]
		ref['versionStatus']      = versionStatuses[0]
		ref['license']            = versionLicenses[0]
		if versionNotes[0]:
			ref['versionNotes']       = versionNotes[0]
		if versionBySefaria[0]:
			ref['digitizedBySefaria'] = versionBySefaria[0]

	elif len(versions) > 1:
		ref['text'], ref['sources'] = merge_translations(versions, versionTitles)
		if len([x for x in set(ref['sources'])]) == 1:
			# if sources only lists one title, no merge acually happened
			ref['versionTitle']       = ref['sources'][0]
			i                         = versionTitles.index(ref['sources'][0])
			ref['versionSource']      = versionSources[i]
			ref['versionStatus']      = versionStatuses[i]
			ref['license']            = versionLicenses[i]
			if versionNotes[i]:
				ref['versionNotes']       = versionNotes[i]
			if versionBySefaria[i]:
				ref['digitizedBySefaria'] = versionBySefaria[i]

			del ref['sources']

	return ref