Exemplo n.º 1
0
def create_link_cluster(refs, user, link_type="", attrs=None, exception_pairs=None, exception_range = None):
    total = 0
    for i, ref in enumerate(refs):
        for j in range(i + 1, len(refs)):
            ref_strings = [refs[i].normal(), refs[j].normal()]

            # If this link matches an exception pair, skip it.
            if all([any([r.startswith(name) for r in ref_strings]) for pair in exception_pairs for name in pair]):
                continue
            # If this link matches an exception range, skip it.
            if refs[i].section_ref() == refs[j].section_ref():
                continue

            d = {
                "refs": ref_strings,
                "type": link_type
                }
            if attrs:
                d.update(attrs)
            try:
                tracker.add(user, Link, d)
                print u"Created {} - {}".format(d["refs"][0], d["refs"][1])
                total += 1
            except Exception as e:
                print u"Exception: {}".format(e)
    return total
Exemplo n.º 2
0
def add_links_from_text(ref, lang, text, text_id, user, **kwargs):
    """
    Scan a text for explicit references to other texts and automatically add new links between
    ref and the mentioned text.

    text["text"] may be a list of segments, an individual segment, or None.

    Lev - added return on 13 July 2014
    """
    if not text:
        return []
    elif isinstance(text, list):
        links = []
        for i in range(len(text)):
            subtext = text[i]
            single = add_links_from_text("%s:%d" % (ref, i + 1), lang, subtext,
                                         text_id, user, **kwargs)
            links += single
        return links
    elif isinstance(text, basestring):
        existingLinks = LinkSet({
            "refs": ref,
            "auto": True,
            "generated_by": "add_links_from_text",
            "source_text_oid": text_id
        }).array(
        )  # Added the array here to force population, so that new links don't end up in this set

        found = []  # The normal refs of the links found in this text
        links = []  # New link objects created by this processes

        refs = library.get_refs_in_string(text, lang)

        for oref in refs:
            link = {
                "refs": [ref, oref.normal()],
                "type": "",
                "auto": True,
                "generated_by": "add_links_from_text",
                "source_text_oid": text_id
            }
            found += [
                oref.normal()
            ]  # Keep this here, since tracker.add will throw an error if the link exists
            try:
                tracker.add(user, Link, link, **kwargs)
                links += [link]
            except InputError as e:
                pass

        # Remove existing links that are no longer supported by the text
        for exLink in existingLinks:
            for r in exLink.refs:
                if r == ref:  # current base ref
                    continue
                if r not in found:
                    tracker.delete(user, Link, exLink._id)
                break

        return links
Exemplo n.º 3
0
def create_link_cluster(refs,
                        user,
                        link_type="",
                        attrs=None,
                        exception_pairs=None):
    total = 0
    for i, ref in enumerate(refs):
        for j in range(i + 1, len(refs)):
            ref_strings = [refs[i].normal(), refs[j].normal()]

            # If this link matches an exception pair, skip it.
            if all([
                    any([r.startswith(name) for r in ref_strings])
                    for pair in exception_pairs for name in pair
            ]):
                continue

            d = {"refs": ref_strings, "type": link_type}
            if attrs:
                d.update(attrs)
            try:
                tracker.add(user, Link, d)
                print u"Created {} - {}".format(d["refs"][0], d["refs"][1])
                total += 1
            except Exception as e:
                print u"Exception: {}".format(e)
    return total
Exemplo n.º 4
0
def add_links_from_text(ref, lang, text, text_id, user, **kwargs):
    """
    Scan a text for explicit references to other texts and automatically add new links between
    ref and the mentioned text.

    text["text"] may be a list of segments, an individual segment, or None.

    Returns a list of links added.
    """
    if not text:
        return []
    elif isinstance(text, list):
        oref    = Ref(ref)
        subrefs = oref.subrefs(len(text))
        links   = []
        for i in range(len(text)):
            single = add_links_from_text(subrefs[i].normal(), lang, text[i], text_id, user, **kwargs)
            links += single
        return links
    elif isinstance(text, basestring):
        existingLinks = LinkSet({
            "refs": ref,
            "auto": True,
            "generated_by": "add_links_from_text",
            "source_text_oid": text_id
        }).array()  # Added the array here to force population, so that new links don't end up in this set

        found = []  # The normal refs of the links found in this text
        links = []  # New link objects created by this processes

        refs = library.get_refs_in_string(text, lang)

        for oref in refs:
            link = {
                # Note -- ref of the citing text is in the first position
                "refs": [ref, oref.normal()],
                "type": "",
                "auto": True,
                "generated_by": "add_links_from_text",
                "source_text_oid": text_id
            }
            found += [oref.normal()]  # Keep this here, since tracker.add will throw an error if the link exists
            try:
                tracker.add(user, Link, link, **kwargs)
                links += [link]
            except InputError as e:
                pass

        # Remove existing links that are no longer supported by the text
        for exLink in existingLinks:
            for r in exLink.refs:
                if r == ref:  # current base ref
                    continue
                if r not in found:
                    tracker.delete(user, Link, exLink._id)
                break

        return links
Exemplo n.º 5
0
def create_link_cluster(refs, user, link_type="", attrs=None):
    for i, ref in enumerate(refs):
        for j in range(i + 1, len(refs)):
            d = {
                "refs": [refs[i].normal(), refs[j].normal()],
                "type": link_type
                }
            if attrs:
                d.update(attrs)
            try:
                tracker.add(user, Link, d)
                print u"Created {} - {}".format(d["refs"][0], d["refs"][1])
            except Exception as e:
                print u"Exception: {}".format(e)
Exemplo n.º 6
0
 def _save_link(self, tref, base_tref, **kwargs):
     nlink = {
         "refs": [base_tref, tref],
         "type": self._link_type,
         "anchorText": "",
         "auto": self._auto,
         "generated_by": self._generated_by_string
     }
     try:
         if not self._user:
             Link(nlink).save()
         else:
             tracker.add(self._user, Link, nlink, **kwargs)
     except DuplicateRecordError as e:
         pass
     return tref
Exemplo n.º 7
0
 def _save_link(self, tref, base_tref, **kwargs):
     nlink = {
         "refs": [base_tref, tref],
         "type": self._link_type,
         "anchorText": "",
         "auto": self._auto,
         "generated_by": self._generated_by_string
     }
     try:
         if not self._user:
             Link(nlink).save()
         else:
             tracker.add(self._user, Link, nlink, **kwargs)
     except DuplicateRecordError as e:
         pass
     return tref
Exemplo n.º 8
0
def func_to_profile():
    with open("links.json", 'r') as fp:
        links = json.load(fp)

    before = time.time()
    #result = post_link(links, server="http://localhost:8000")
    for l, our_link in enumerate(links):
        'Link().load({"$or": [{"refs": self.refs}, {"refs": [self.refs[1], self.refs[0]]}]})'
        try:
            our_link["refs"][1] = our_link["refs"][1].split(":")[0]
            add(1, Link, our_link)
        except Exception as e:
            pass
    # with open('result.html', 'w') as f:
    #     try:
    #         f.writelines(result)
    #     except:
    #         print(result)
    after = time.time()
    print(after - before)
Exemplo n.º 9
0
def add_links_from_text(ref, text, text_id, user, **kwargs):
    """
	Scan a text for explicit references to other texts and automatically add new links between
	ref and the mentioned text.

	text["text"] may be a list of segments, an individual segment, or None.

	Lev - added return on 13 July 2014
	"""
    if not text or "text" not in text:
        return []
    elif isinstance(text["text"], list):
        links = []
        for i in range(len(text["text"])):
            subtext = copy.deepcopy(text)
            subtext["text"] = text["text"][i]
            single = add_links_from_text("%s:%d" % (ref, i + 1), subtext,
                                         text_id, user, **kwargs)
            links += single
        return links
    elif isinstance(text["text"], basestring):
        links = []
        matches = get_refs_in_string(text["text"])
        for mref in matches:
            link = {
                "refs": [ref, mref],
                "type": "",
                "auto": True,
                "generated_by": "add_links_from_text",
                "source_text_oid": text_id
            }
            try:
                tracker.add(user, model.Link, link, **kwargs)
                links += [link]
            except InputError as e:
                pass
        return links
Exemplo n.º 10
0
def add_links_from_text(ref, text, text_id, user, **kwargs):
	"""
	Scan a text for explicit references to other texts and automatically add new links between
	ref and the mentioned text.

	text["text"] may be a list of segments, an individual segment, or None.

	Lev - added return on 13 July 2014
	"""
	if not text or "text" not in text:
		return []
	elif isinstance(text["text"], list):
		links = []
		for i in range(len(text["text"])):
			subtext = copy.deepcopy(text)
			subtext["text"] = text["text"][i]
			single = add_links_from_text("%s:%d" % (ref, i + 1), subtext, text_id, user, **kwargs)
			links += single
		return links
	elif isinstance(text["text"], basestring):
		links = []
		matches = get_refs_in_string(text["text"])
		for mref in matches:
			link = {
				"refs": [ref, mref],
				"type": "",
				"auto": True,
				"generated_by": "add_links_from_text",
				"source_text_oid": text_id
			}
			try:
				tracker.add(user, model.Link, link, **kwargs)
				links += [link]
			except InputError as e:
				pass
		return links
Exemplo n.º 11
0
def add_links_from_text(oref, lang, text, text_id, user, **kwargs):
    """
    Scan a text for explicit references to other texts and automatically add new links between
    ref and the mentioned text.

    text["text"] may be a list of segments, an individual segment, or None.

    The set of no longer supported links (`existingLinks` - `found`) is deleted.
    If Varnish is used, all linked refs, old and new, are refreshed

    Returns `links` - the list of links added.
    """
    if not text:
        return []
    elif isinstance(text, list):
        subrefs = oref.subrefs(len(text))
        links   = []
        for i in range(len(text)):
            single = add_links_from_text(subrefs[i], lang, text[i], text_id, user, **kwargs)
            links += single
        return links
    elif isinstance(text, basestring):
        """
            Keeps three lists:
            * existingLinks - The links that existed before the text was rescanned
            * found - The links found in this scan of the text
            * links - The new links added in this scan of the text

            The set of no longer supported links (`existingLinks` - `found`) is deleted.
            The set of all links (`existingLinks` + `Links`) is refreshed in Varnish.
        """
        existingLinks = LinkSet({
            "refs": oref.normal(),
            "auto": True,
            "generated_by": "add_links_from_text",
            "source_text_oid": text_id
        }).array()  # Added the array here to force population, so that new links don't end up in this set

        found = []  # The normal refs of the links found in this text
        links = []  # New link objects created by this processes

        refs = library.get_refs_in_string(text, lang)

        for linked_oref in refs:
            link = {
                # Note -- ref of the citing text is in the first position
                "refs": [oref.normal(), linked_oref.normal()],
                "type": "",
                "auto": True,
                "generated_by": "add_links_from_text",
                "source_text_oid": text_id
            }
            found += [linked_oref.normal()]  # Keep this here, since tracker.add will throw an error if the link exists
            try:
                tracker.add(user, Link, link, **kwargs)
                links += [link]
                if USE_VARNISH:
                    invalidate_ref(linked_oref)
            except InputError as e:
                pass

        # Remove existing links that are no longer supported by the text
        for exLink in existingLinks:
            for r in exLink.refs:
                if r == oref.normal():  # current base ref
                    continue
                if USE_VARNISH:
                    invalidate_ref(Ref(r))
                if r not in found:
                    tracker.delete(user, Link, exLink._id)
                break

        return links
Exemplo n.º 12
0
def add_commentary_links(tref, user, **kwargs):
	"""
	Automatically add links for each comment in the commentary text denoted by 'ref'.
	E.g., for the ref 'Sforno on Kohelet 3:2', automatically set links for
	Kohelet 3:2 <-> Sforno on Kohelet 3:2:1, Kohelet 3:2 <-> Sforno on Kohelet 3:2:2, etc.
	for each segment of text (comment) that is in 'Sforno on Kohelet 3:2'.
	"""
	text = get_text(tref, commentary=0, context=0, pad=False)
	tref = model.Ref(tref).normal()

	book = tref[tref.find(" on ") + 4:]

	if len(text["sections"]) == len(text["sectionNames"]):
		# this is a single comment, trim the last secton number (comment) from ref
		book = book[0:book.rfind(":")]
		link = {
			"refs": [book, tref],
			"type": "commentary",
			"anchorText": "",
			"auto": True,
			"generated_by": "add_commentary_links"
		}
		try:
			tracker.add(user, model.Link, link, **kwargs)
		except DuplicateRecordError as e:
			pass

	elif len(text["sections"]) == (len(text["sectionNames"]) - 1):
		# This means that the text (and it's corresponding ref) being posted has the amount of sections like the parent text
		# (the text being commented on) so this is single group of comments on the lowest unit of the parent text.
		# and we simply iterate and create a link for each existing one to point to the same unit of parent text
		length = max(len(text["text"]), len(text["he"]))
		for i in range(length):
				link = {
					"refs": [book, tref + ":" + str(i + 1)],
					"type": "commentary",
					"anchorText": "",
					"auto": True,
					"generated_by": "add_commentary_links"
				}
				try:
					tracker.add(user, model.Link, link, **kwargs)
				except DuplicateRecordError as e:
					pass

	elif len(text["sections"]) > 0:
		# any other case where the posted ref sections do not match the length of the parent texts sections
		# this is a larger group of comments meaning it needs to be further broken down
		# in order to be able to match the commentary to the basic parent text units,
		# recur on each section
		length = max(len(text["text"]), len(text["he"]))
		for i in range(length):
			add_commentary_links("%s:%d" % (tref, i + 1), user)
	else:
		#This is a special case of the above, where the sections length is 0 and that means this is
		# a whole text that has been posted. For  this we need a better way than get_text() to get the correct length of
		# highest order section counts.
		# We use the counts document for that.
		text_counts = counts.count_texts(tref)
		length = len(text_counts["counts"])
		for i in range(length):
			add_commentary_links("%s:%d" % (tref, i+1), user)
Exemplo n.º 13
0
def add_commentary_links(tref, user, **kwargs):
    """
    Automatically add links for each comment in the commentary text denoted by 'tref'.
    E.g., for the ref 'Sforno on Kohelet 3:2', automatically set links for
    Kohelet 3:2 <-> Sforno on Kohelet 3:2:1, Kohelet 3:2 <-> Sforno on Kohelet 3:2:2, etc.
    for each segment of text (comment) that is in 'Sforno on Kohelet 3:2'.
    """
    text = TextFamily(Ref(tref), commentary=0, context=0, pad=False).contents()
    tref = Ref(tref).normal()

    book = tref[tref.find(" on ") + 4:]

    if len(text["sections"]) == len(text["sectionNames"]):
        # this is a single comment, trim the last section number (comment) from ref
        book = book[0:book.rfind(":")]
        link = {
            "refs": [book, tref],
            "type": "commentary",
            "anchorText": "",
            "auto": True,
            "generated_by": "add_commentary_links"
        }
        try:
            tracker.add(user, Link, link, **kwargs)
        except DuplicateRecordError as e:
            pass

    elif len(text["sections"]) == (len(text["sectionNames"]) - 1):
        # This means that the text (and it's corresponding ref) being posted has the amount of sections like the parent text
        # (the text being commented on) so this is single group of comments on the lowest unit of the parent text.
        # and we simply iterate and create a link for each existing one to point to the same unit of parent text
        length = max(len(text["text"]), len(text["he"]))
        for i in range(length):
            link = {
                "refs": [book, tref + ":" + str(i + 1)],
                "type": "commentary",
                "anchorText": "",
                "auto": True,
                "generated_by": "add_commentary_links"
            }
            try:
                tracker.add(user, Link, link, **kwargs)
            except DuplicateRecordError as e:
                pass

    elif len(text["sections"]) > 0:
        # any other case where the posted ref sections do not match the length of the parent texts sections
        # this is a larger group of comments meaning it needs to be further broken down
        # in order to be able to match the commentary to the basic parent text units,
        # recur on each section
        length = max(len(text["text"]), len(text["he"]))
        for i in range(length):
            add_commentary_links("%s:%d" % (tref, i + 1), user)
    else:
        #This is a special case of the above, where the sections length is 0 and that means this is
        # a whole text that has been posted. For  this we need a better way than get_text() to get the correct length of
        # highest order section counts.
        # We use the counts document for that.
        #text_counts = counts.count_texts(tref)
        #length = len(text_counts["counts"])
        sn = StateNode(tref)
        length = sn.ja('all').length()
        for i in range(length):
            add_commentary_links("%s:%d" % (tref, i + 1), user)
Exemplo n.º 14
0
        is_new_perek = not lastrow or row[1] != lastrow[1]
        is_new_mesechet = not lastrow or row[0] != lastrow[0]

        # Add link
        mishnaRef = Ref("{} {}:{}-{}".format(row[0], row[1], row[2], row[3]))
        mishnahInTalmudRef = Ref("{} {}:{}-{}:{}".format(
            row[0], row[4], row[5], row[6], row[7]))
        print mishnaRef.normal() + " ... " + mishnahInTalmudRef.normal()

        if live:
            try:
                tracker.add(
                    28, Link, {
                        "refs":
                        [mishnaRef.normal(),
                         mishnahInTalmudRef.normal()],
                        "auto": True,
                        "generated_by": "mishnah_map",
                        "type": "Mishnah in Talmud"
                    })
            except DuplicateRecordError as e:
                print e

        # Try highlighting hadran.  Note that the last hadran gets highlighted outside of the loop
        """
        13 non standard Hadrans (code is catching all of them, but they should be checked)

        Hadrans in parens, joined at end of other line:
        Sukkah 20b:29
        Sukkah 29b:5
for book in books:
    rashi_book = "Rashi on " + book
    onkelos_book = "Onkelos " + book
    i = library.get_index(rashi_book)
    assert isinstance(i, CommentaryIndex)
    all_rashis = i.all_segment_refs()

    # Loop through all of the Rashis
    for rashi_ref in all_rashis:
        rashi = strip_nikkud(TextChunk(rashi_ref, "he", "On Your Way").text)

        # If it matches the pattern
        for pat in patterns:
            if pat in rashi:
                onkelos_ref = Ref(rashi_ref.section_ref().normal().replace(
                    rashi_book, onkelos_book))
                d = {
                    "refs": [rashi_ref.normal(),
                             onkelos_ref.normal()],
                    "type": "reference",
                    "auto": True,
                    "generated_by": "Rashi - Onkelos Linker"
                }
                tracker.add(28, Link, d)
                print u"{}\t{}\t{}".format(rashi_ref.normal(), pat,
                                           rashi.strip())
                total += 1
                break

print "\nLinks: {}".format(total)
books = ["Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy"]
total = 0
for book in books:
    rashi_book = "Rashi on " + book
    onkelos_book = "Onkelos " + book
    i = library.get_index(rashi_book)
    assert isinstance(i, CommentaryIndex)
    all_rashis = i.all_segment_refs()

    # Loop through all of the Rashis
    for rashi_ref in all_rashis:
        rashi = strip_nikkud(TextChunk(rashi_ref, "he", "On Your Way").text)

        # If it matches the pattern
        for pat in patterns:
            if pat in rashi:
                onkelos_ref = Ref(rashi_ref.section_ref().normal().replace(rashi_book, onkelos_book))
                d = {
                    "refs": [rashi_ref.normal(), onkelos_ref.normal()],
                    "type": "reference",
                    "auto": True,
                    "generated_by": "Rashi - Onkelos Linker",
                }
                tracker.add(28, Link, d)
                print u"{}\t{}\t{}".format(rashi_ref.normal(), pat, rashi.strip())
                total += 1
                break

print "\nLinks: {}".format(total)
Exemplo n.º 17
0
def add_commentary_links(oref, user, text=None, **kwargs):
    #//TODO: commentary refactor, also many other lines can be made better
    """
    Automatically add links for each comment in the commentary text denoted by 'tref'.
    E.g., for the ref 'Sforno on Kohelet 3:2', automatically set links for
    Kohelet 3:2 <-> Sforno on Kohelet 3:2:1, Kohelet 3:2 <-> Sforno on Kohelet 3:2:2, etc.
    for each segment of text (comment) that is in 'Sforno on Kohelet 3:2'.
    """

    assert oref.is_commentary()
    tref = oref.normal()
    base_tref = tref[tref.find(" on ") + 4:]
    base_oref = Ref(base_tref)
    found_links = []

    # This is a special case, where the sections length is 0 and that means this is
    # a whole text or complex text node that has been posted. So we get each leaf node
    if not oref.sections:
        vs = StateNode(tref).versionState
        if not vs.is_new_state:
            vs.refresh()  # Needed when saving multiple nodes in a complex text.  This may be moderately inefficient.
        content_nodes = oref.index_node.get_leaf_nodes()
        for r in content_nodes:
            cn_oref = r.ref()
            text = TextFamily(cn_oref, commentary=0, context=0, pad=False).contents()
            length = cn_oref.get_state_ja().length()
            for i, sr in enumerate(cn_oref.subrefs(length)):
                stext = {"sections": sr.sections,
                        "sectionNames": text['sectionNames'],
                        "text": text["text"][i] if i < len(text["text"]) else "",
                        "he": text["he"][i] if i < len(text["he"]) else ""
                        }
                found_links += add_commentary_links(sr, user, stext, **kwargs)

    else:
        if not text:
            try:
                text = TextFamily(oref, commentary=0, context=0, pad=False).contents()
            except AssertionError:
                logger.warning(u"Structure node passed to add_commentary_links: {}".format(oref.normal()))
                return

        if len(text["sectionNames"]) > len(text["sections"]) > 0:
            # any other case where the posted ref sections do not match the length of the parent texts sections
            # this is a larger group of comments meaning it needs to be further broken down
            # in order to be able to match the commentary to the basic parent text units,
            # recur on each section
            length = max(len(text["text"]), len(text["he"]))
            for i,r in enumerate(oref.subrefs(length)):
                stext = {"sections": r.sections,
                        "sectionNames": text['sectionNames'],
                        "text": text["text"][i] if i < len(text["text"]) else "",
                        "he": text["he"][i] if i < len(text["he"]) else ""
                        }
                found_links += add_commentary_links(r, user, stext, **kwargs)

        # this is a single comment, trim the last section number (comment) from ref
        elif len(text["sections"]) == len(text["sectionNames"]):
            if len(text['he']) or len(text['text']): #only if there is actually text
                base_tref = base_tref[0:base_tref.rfind(":")]
                link = {
                    "refs": [base_tref, tref],
                    "type": "commentary",
                    "anchorText": "",
                    "auto": True,
                    "generated_by": "add_commentary_links"
                }
                found_links += [tref]
                try:
                    tracker.add(user, Link, link, **kwargs)
                except DuplicateRecordError as e:
                    pass
    return found_links
Exemplo n.º 18
0
def add_commentary_links(oref, user, **kwargs):
    """
    Automatically add links for each comment in the commentary text denoted by 'tref'.
    E.g., for the ref 'Sforno on Kohelet 3:2', automatically set links for
    Kohelet 3:2 <-> Sforno on Kohelet 3:2:1, Kohelet 3:2 <-> Sforno on Kohelet 3:2:2, etc.
    for each segment of text (comment) that is in 'Sforno on Kohelet 3:2'.
    """
    try:
        text = TextFamily(oref, commentary=0, context=0, pad=False).contents()
    except AssertionError:
        logger.warning(u"Structure node passed to add_commentary_links: {}".format(oref.normal()))
        return

    assert oref.is_commentary()

    tref = oref.normal()

    base_tref = tref[tref.find(" on ") + 4:]

    if len(text["sections"]) == len(text["sectionNames"]):
        # this is a single comment, trim the last section number (comment) from ref
        base_tref = base_tref[0:base_tref.rfind(":")]
        link = {
            "refs": [base_tref, tref],
            "type": "commentary",
            "anchorText": "",
            "auto": True,
            "generated_by": "add_commentary_links"
        }
        try:
            tracker.add(user, Link, link, **kwargs)
        except DuplicateRecordError as e:
            pass

    elif len(text["sections"]) == (len(text["sectionNames"]) - 1):
        # This means that the text (and it's corresponding ref) being posted has the amount of sections like the parent text
        # (the text being commented on) so this is single group of comments on the lowest unit of the parent text.
        # and we simply iterate and create a link for each existing one to point to the same unit of parent text
        length = max(len(text["text"]), len(text["he"]))
        for i in range(length):
                link = {
                    "refs": [base_tref, tref + ":" + str(i + 1)],
                    "type": "commentary",
                    "anchorText": "",
                    "auto": True,
                    "generated_by": "add_commentary_links"
                }
                try:
                    tracker.add(user, Link, link, **kwargs)
                except DuplicateRecordError as e:
                    pass

    elif len(text["sections"]) > 0:
        # any other case where the posted ref sections do not match the length of the parent texts sections
        # this is a larger group of comments meaning it needs to be further broken down
        # in order to be able to match the commentary to the basic parent text units,
        # recur on each section
        length = max(len(text["text"]), len(text["he"]))
        for r in oref.subrefs(length):
            add_commentary_links(r, user, **kwargs)

    else:
        #This is a special case of the above, where the sections length is 0 and that means this is
        # a whole text that has been posted. For  this we need a better way than get_text() to get the correct length of
        # highest order section counts.
        # We use the counts document for that.
        #text_counts = counts.count_texts(tref)
        #length = len(text_counts["counts"])

        sn = StateNode(tref)
        if not sn.versionState.is_new_state:
            sn.versionState.refresh()  # Needed when saving multiple nodes in a complex text.  This may be moderately inefficient.
            sn = StateNode(tref)
        length = sn.ja('all').length()
        for r in oref.subrefs(length):
            add_commentary_links(r, user, **kwargs)

        if USE_VARNISH:
            invalidate_ref(oref)
            invalidate_ref(Ref(base_tref))
Exemplo n.º 19
0
def import_from_csv(filename, action="status", category="all"):
    existing_titles = []
    with open(filename, 'rb') as csvfile:
        rows = csv.reader(csvfile)
        header = rows.next()
        for text in rows:
            if not len(text[2]) or not len(text[9]):
                # Require a primary titl and something set in "ready to upload" field
                continue
            new_index = {
                "title":
                text[2].strip(),
                "sectionNames": [s.strip() for s in text[8].split(",")],
                "categories": [s.strip() for s in text[7].split(", ")],
                "titleVariants":
                [text[2].strip()] + [s.strip() for s in text[6].split(", ")],
            }
            if len(text[3]):
                new_index["heTitle"] = text[3].strip()
            if len(text[4]):
                new_index["transliteratedTitle"] = text[4].strip()
                new_index["titleVariants"] += [
                    new_index["transliteratedTitle"]
                ]
                new_index["titleVariants"] = [
                    v for v in new_index["titleVariants"] if v
                ]
            if len(text[10]):
                new_index["length"] = int(text[10])
            if len(text[12]):
                # Only import the last order field for now
                new_index["order"] = [map(int, text[12].split(","))[-1]]

            existing = db.index.find_one({"titleVariants": new_index["title"]})

            if action == "status":
                # Print information about texts listed
                if not existing:
                    print "NEW - " + new_index["title"]
                if existing:
                    if new_index["title"] == existing["title"]:
                        print "EXISTING - " + new_index["title"]
                    else:
                        print "EXISTING (title change) - " + new_index["title"]
                    existing_titles.append(existing["title"])

                validation = texts.validate_index(new_index)
                if "error" in validation:
                    print "*** %s" % validation["error"]

            # Add texts if their category is specified in command line
            if action in ("post", "update") and category:
                if category == "all" or category in new_index["categories"][:2]:
                    print "Saving %s" % new_index["title"]

                    if action == "update":
                        # TOOD remove any fields that have empty values like []
                        # before updating - don't overwrite with nothing
                        new_index.update(existing)

                    tracker.add(1, sefaria.model.index.Index, new_index)

            if action == "hebrew" and existing:
                if "heTitle" not in existing:
                    print "Missing Hebrew: %s" % (existing["title"])
                    existing_titles.append(existing["title"])

    if action == "status":
        indexes = db.index.find()
        for i in indexes:
            if i["title"] not in existing_titles:
                print "NOT ON SHEET - %s" % i["title"]

    if action == "hebrew":
        indexes = db.index.find()
        for i in indexes:
            if "heTitle" not in i and i["title"] not in existing_titles:
                print "Still no Hebrew:  %s" % i["title"]

    if action in ("post", "update"):
        summaries.update_summaries()
        summaries.save_toc_to_db()
Exemplo n.º 20
0
def add_links_from_text(oref, lang, text, text_id, user, **kwargs):
    """
    Scan a text for explicit references to other texts and automatically add new links between
    ref and the mentioned text.

    text["text"] may be a list of segments, an individual segment, or None.

    The set of no longer supported links (`existingLinks` - `found`) is deleted.
    If Varnish is used, all linked refs, old and new, are refreshed

    Returns `links` - the list of links added.
    """
    if not text:
        return []
    elif isinstance(text, list):
        subrefs = oref.subrefs(len(text))
        links = []
        for i in range(len(text)):
            single = add_links_from_text(subrefs[i], lang, text[i], text_id,
                                         user, **kwargs)
            links += single
        return links
    elif isinstance(text, basestring):
        """
            Keeps three lists:
            * existingLinks - The links that existed before the text was rescanned
            * found - The links found in this scan of the text
            * links - The new links added in this scan of the text

            The set of no longer supported links (`existingLinks` - `found`) is deleted.
            The set of all links (`existingLinks` + `Links`) is refreshed in Varnish.
        """
        existingLinks = LinkSet({
            "refs": oref.normal(),
            "auto": True,
            "generated_by": "add_links_from_text",
            "source_text_oid": text_id
        }).array(
        )  # Added the array here to force population, so that new links don't end up in this set

        found = []  # The normal refs of the links found in this text
        links = []  # New link objects created by this processes

        refs = library.get_refs_in_string(text, lang)

        for linked_oref in refs:
            link = {
                # Note -- ref of the citing text is in the first position
                "refs": [oref.normal(), linked_oref.normal()],
                "type": "",
                "auto": True,
                "generated_by": "add_links_from_text",
                "source_text_oid": text_id
            }
            found += [
                linked_oref.normal()
            ]  # Keep this here, since tracker.add will throw an error if the link exists
            try:
                tracker.add(user, Link, link, **kwargs)
                links += [link]
                if USE_VARNISH:
                    invalidate_ref(linked_oref)
            except InputError as e:
                pass

        # Remove existing links that are no longer supported by the text
        for exLink in existingLinks:
            for r in exLink.refs:
                if r == oref.normal():  # current base ref
                    continue
                if USE_VARNISH:
                    invalidate_ref(Ref(r))
                if r not in found:
                    tracker.delete(user, Link, exLink._id)
                break

        return links
Exemplo n.º 21
0
def import_from_csv(filename, action="status", category="all"):
	existing_titles = []
	with open(filename, 'rb') as csvfile:
		rows = csv.reader(csvfile)
		header = rows.next()
		for text in rows:
			if not len(text[2]) or not len(text[9]): 
				# Require a primary titl and something set in "ready to upload" field
				continue	
			new_index = {
				"title": text[2].strip(),
				"sectionNames": [s.strip() for s in text[8].split(",")],
				"categories": [s.strip() for s in text[7].split(", ")],
				"titleVariants": [text[2].strip()] + [s.strip() for s in  text[6].split(", ")],
			}
			if len(text[3]):
				new_index["heTitle"] = text[3].strip()
			if len(text[4]):
				new_index["transliteratedTitle"] = text[4].strip()
				new_index["titleVariants"] += [new_index["transliteratedTitle"]]
				new_index["titleVariants"] = [v for v in new_index["titleVariants"] if v]
			if len(text[10]):
				new_index["length"] = int(text[10])
			if len(text[12]):
				# Only import the last order field for now
				new_index["order"] = [map(int, text[12].split(","))[-1]] 

			existing = db.index.find_one({"titleVariants": new_index["title"]})

			if action == "status":
				# Print information about texts listed
				if not existing:
					print "NEW - " + new_index["title"]
				if existing:
					if new_index["title"] == existing["title"]:
						print "EXISTING - " + new_index["title"]
					else:
						print "EXISTING (title change) - " + new_index["title"]
					existing_titles.append(existing["title"])

				validation = texts.validate_index(new_index)
				if "error" in validation:
					print "*** %s" % validation["error"]


			# Add texts if their category is specified in command line
			if action in ("post", "update") and category:
				if category == "all" or category in new_index["categories"][:2]:
					print "Saving %s" % new_index["title"]

					if action == "update":
						# TOOD remove any fields that have empty values like []
						# before updating - don't overwrite with nothing
						new_index.update(existing)

					tracker.add(1, sefaria.model.index.Index, new_index)
			

			if action == "hebrew" and existing:
				if "heTitle" not in existing:
					print "Missing Hebrew: %s" % (existing["title"])
					existing_titles.append(existing["title"])


	if action == "status":
		indexes = db.index.find()
		for i in indexes:
			if i["title"] not in existing_titles:
				print "NOT ON SHEET - %s" % i["title"]

	if action == "hebrew":
		indexes = db.index.find()
		for i in indexes:
			if "heTitle" not in i and i["title"] not in existing_titles:
				print "Still no Hebrew:  %s" % i["title"]

	if action in ("post", "update"):
		from sefaria.model import library
		library.rebuild_toc()
Exemplo n.º 22
0
def add_commentary_links(oref, user, text=None, **kwargs):
    #//TODO: commentary refactor, also many other lines can be made better
    """
    Automatically add links for each comment in the commentary text denoted by 'tref'.
    E.g., for the ref 'Sforno on Kohelet 3:2', automatically set links for
    Kohelet 3:2 <-> Sforno on Kohelet 3:2:1, Kohelet 3:2 <-> Sforno on Kohelet 3:2:2, etc.
    for each segment of text (comment) that is in 'Sforno on Kohelet 3:2'.
    """

    assert oref.is_commentary()
    tref = oref.normal()
    base_tref = tref[tref.find(" on ") + 4:]
    base_oref = Ref(base_tref)
    found_links = []

    # This is a special case, where the sections length is 0 and that means this is
    # a whole text or complex text node that has been posted. So we get each leaf node
    if not oref.sections:
        vs = StateNode(tref).versionState
        if not vs.is_new_state:
            vs.refresh(
            )  # Needed when saving multiple nodes in a complex text.  This may be moderately inefficient.
        content_nodes = oref.index_node.get_leaf_nodes()
        for r in content_nodes:
            cn_oref = r.ref()
            text = TextFamily(cn_oref, commentary=0, context=0,
                              pad=False).contents()
            length = cn_oref.get_state_ja().length()
            for i, sr in enumerate(cn_oref.subrefs(length)):
                stext = {
                    "sections": sr.sections,
                    "sectionNames": text['sectionNames'],
                    "text": text["text"][i] if i < len(text["text"]) else "",
                    "he": text["he"][i] if i < len(text["he"]) else ""
                }
                found_links += add_commentary_links(sr, user, stext, **kwargs)

    else:
        if not text:
            try:
                text = TextFamily(oref, commentary=0, context=0,
                                  pad=False).contents()
            except AssertionError:
                logger.warning(
                    u"Structure node passed to add_commentary_links: {}".
                    format(oref.normal()))
                return

        if len(text["sectionNames"]) > len(text["sections"]) > 0:
            # any other case where the posted ref sections do not match the length of the parent texts sections
            # this is a larger group of comments meaning it needs to be further broken down
            # in order to be able to match the commentary to the basic parent text units,
            # recur on each section
            length = max(len(text["text"]), len(text["he"]))
            for i, r in enumerate(oref.subrefs(length)):
                stext = {
                    "sections": r.sections,
                    "sectionNames": text['sectionNames'],
                    "text": text["text"][i] if i < len(text["text"]) else "",
                    "he": text["he"][i] if i < len(text["he"]) else ""
                }
                found_links += add_commentary_links(r, user, stext, **kwargs)

        # this is a single comment, trim the last section number (comment) from ref
        elif len(text["sections"]) == len(text["sectionNames"]):
            if len(text['he']) or len(
                    text['text']):  #only if there is actually text
                base_tref = base_tref[0:base_tref.rfind(":")]
                link = {
                    "refs": [base_tref, tref],
                    "type": "commentary",
                    "anchorText": "",
                    "auto": True,
                    "generated_by": "add_commentary_links"
                }
                found_links += [tref]
                try:
                    tracker.add(user, Link, link, **kwargs)
                except DuplicateRecordError as e:
                    pass
    return found_links
Exemplo n.º 23
0
    next(csvfile)
    for row in csv.reader(csvfile):

        is_new_perek = not lastrow or row[1] != lastrow[1]
        is_new_mesechet = not lastrow or row[0] != lastrow[0]

        # Add link
        mishnaRef = Ref("{} {}:{}-{}".format(row[0], row[1], row[2], row[3]))
        mishnahInTalmudRef = Ref("{} {}:{}-{}:{}".format(row[0], row[4], row[5], row[6], row[7]))
        print mishnaRef.normal() + " ... " + mishnahInTalmudRef.normal()

        if live:
            try:
                tracker.add(28, Link, {
                    "refs": [mishnaRef.normal(), mishnahInTalmudRef.normal()],
                    "auto": True,
                    "generated_by": "mishnah_map",
                    "type": "Mishnah in Talmud"
                })
            except DuplicateRecordError as e:
                print e

        # Try highlighting hadran.  Note that the last hadran gets highlighted outside of the loop
        """
        13 non standard Hadrans (code is catching all of them, but they should be checked)

        Hadrans in parens, joined at end of other line:
        Sukkah 20b:29
        Sukkah 29b:5

        In parens on it's own line:
        Sukkah 42b:12