Exemplo n.º 1
0
def remove_old_counts():
    """
    Deletes counts documents which no longer correspond to a text or category.
    """
    # counts = model.CountSet()
    # If there are counts documents save in the DB with invalid titles,
    # instantiation of the Count will cause a BookNameError.
    # But in this code instantiation happens in the line 'for count in counts'
    # How do we catch that? Additionally, we need access to the bad title after
    # The error has occurred. How would we get that? Reverting to direct DB call for now.
    counts = db.vstate.find()
    for count in counts:
        if count.get("title", None):
            try:
                model.get_index(count["title"])
            except BookNameError:
                print u"Old count: %s" % count["title"]
                #count.delete()
                db.vstate.remove({"_id": count["_id"]})
        else:
            #TODO incomplete for Category Counts. 
            continue
            categories = count.categories
            i = model.IndexSet({"$and": [{'categories.0': categories[0]}, {"categories": {"$all": categories}}, {"categories": {"$size": len(categories)}} ]})
            if not i.count():
                print "Old category %s" % " > ".join(categories)
Exemplo n.º 2
0
def test_get_index():
    r = model.get_index("Rashi on Exodus")
    assert isinstance(r, model.CommentaryIndex)
    assert r.titleVariants == [u'Rashi on Exodus']

    r = model.get_index("Exodus")
    assert isinstance(r, model.Index)
    assert r.title == u'Exodus'
Exemplo n.º 3
0
def test_get_index():
    r = model.get_index("Rashi on Exodus")
    assert isinstance(r, model.CommentaryIndex)
    assert r.titleVariants == [u'Rashi on Exodus']

    r = model.get_index("Exodus")
    assert isinstance(r, model.Index)
    assert r.title == u'Exodus'
Exemplo n.º 4
0
def test_get_index():
    r = model.get_index("Rashi on Exodus")
    assert isinstance(r, model.CommentaryIndex)
    assert u"Rashi on Exodus" == r.title
    assert u"Rashi on Exodus" in r.titleVariants
    assert u"Rashi" not in r.titleVariants
    assert u"Exodus" not in r.titleVariants

    r = model.get_index("Exodus")
    assert isinstance(r, model.Index)
    assert r.title == u"Exodus"
Exemplo n.º 5
0
def test_get_index():
    r = model.get_index("Rashi on Exodus")
    assert isinstance(r, model.CommentaryIndex)
    assert u'Rashi on Exodus' == r.title
    assert u'Rashi on Exodus' in r.titleVariants
    assert u'Rashi' not in r.titleVariants
    assert u'Exodus' not in r.titleVariants

    r = model.get_index("Exodus")
    assert isinstance(r, model.Index)
    assert r.title == u'Exodus'
Exemplo n.º 6
0
def export_text(text):
	"""
	Exports 'text' (a document from the texts collection, or virtual merged document) 
	by preparing it as a export document and passing to 'export_text_doc'.
	"""
	print text["title"]
	try:
		index = model.get_index(text["title"])
	except Exception as e:
		print "Skipping %s - %s" % (text["title"], e.message)
		return

	if index.is_complex():
		# TODO handle export of complex texts
		print "Skipping Complex Text: %s - " % (text["title"])
		return		

	text["heTitle"]      = index.nodes.primary_title("he")
	text["categories"]   = index.categories
	text["sectionNames"] = index.schema["sectionNames"]
	text["text"]         = text.get("text", None) or text.get("chapter", "")

	if "_id" in text:
		del text["_id"]
		del text["chapter"]

	export_text_doc(text)
Exemplo n.º 7
0
def generate_refs_list(query={}):
	"""
	Generate a list of refs to all available sections.
	"""
	trefs = []
	counts = db.counts.find(query)
	for c in counts:
		if "title" not in c:
			continue  # this is a category count

		try:
			i = model.get_index(c["title"])
		except Exception:
			db.counts.remove(c)
			continue
			# If there is not index record to match the count record,
			# the count should be removed.

		title = c["title"]
		he = list_from_counts(c["availableTexts"]["he"])
		en = list_from_counts(c["availableTexts"]["en"])
		sections = texts.union(he, en)
		for n in sections:
			if i.categories[0] == "Talmud":
				n = section_to_daf(int(n))
			if getattr(i, "commentaryCategories", None) and i.commentaryCategories[0] == "Talmud":
				split = n.split(":")
				n = ":".join([section_to_daf(int(n[0]))] + split[1:])
			tref = "%s %s" % (title, n) if n else title
			trefs.append(tref)

	return trefs
Exemplo n.º 8
0
def text_category(text):
    """Returns the top level category for text"""
    try:
        i = m.get_index(text)
        result = mark_safe(getattr(i, "categories", ["[no cats]"])[0])
    except:
        result = "[text not found]"
    return result
Exemplo n.º 9
0
def export_index(title):
	"""
	Writes the JSON of the index record of the text called `title`. 
	"""
	index = model.get_index(title)
	index = index.contents(v2=True)
	path  = "%s/%s_index.json" % (SEFARIA_EXPORT_PATH, title)
	write_doc(index, path)
Exemplo n.º 10
0
def text_category(text):
	"""Returns the top level category for text"""
	try:
		i = m.get_index(text)
		result = mark_safe(getattr(i, "categories", ["[no cats]"])[0])
	except: 
		result = "[text not found]"
	return result
Exemplo n.º 11
0
def remove_old_counts():
    """
    Deletes counts documents which no longer correspond to a text or category.
    """
    # If there are counts documents save in the DB with invalid titles,
    # instantiation of the Count will cause a BookNameError.
    # But in this code instantiation happens in the line 'for count in counts'
    # How do we catch that? Additionally, we need access to the bad title after
    # The error has occurred. How would we get that? Reverting to direct DB call for now.
    counts = db.vstate.find({}, {"title": 1})
    for count in counts:
        if count.get("title", None):
            try:
                model.get_index(count["title"])
            except BookNameError:
                print u"Old count: %s" % count["title"]
                db.vstate.remove({"_id": count["_id"]})
Exemplo n.º 12
0
def reset_counts(request, title=None):
    if title:
        i  = model.get_index(title)
        vs = model.VersionState(index=i)
        vs.refresh()
        return HttpResponseRedirect("/%s?m=Counts-Rebuilt" % model.Ref(i.title).url())
    else:
        model.refresh_all_states()
        return HttpResponseRedirect("/?m=Counts-Rebuilt")
Exemplo n.º 13
0
def make_text(doc):
    """
	Export doc into a simple text format.

	if complex, go through nodes depth first,
	at each node, output name of node
	if node is leaf, run flatten on it

	"""

    index = model.get_index(doc["title"])
    text = "\n".join([doc["title"], doc.get("heTitle", ""), doc["versionTitle"], doc["versionSource"]])    
    version = Version().load({'title': doc["title"], 'versionTitle': doc["versionTitle"], 'language': doc["language"]})	

    isMerged = (doc["versionTitle"] == "merged")
    
    if "versions" in doc:
        if isMerged:
            version = Version().load({'title': doc["title"], 'versionTitle': doc["versions"][0][0], 'language': doc["language"]})
        text += "\nThis file contains merged sections from the following text versions:"
        for v in doc["versions"]:
            text += "\n-%s\n-%s" % (v[0], v[1])

			
    def make_node(node, depth, **kwargs):

        if node.is_leaf():
            content = "\n\n%s" % node.primary_title(doc["language"])
            content += flatten(version.content_node(node), node.sectionNames)
            return "\n\n%s" % content
        else:
            return "\n\n%s" % node.primary_title(doc["language"])

    def flatten(text, sectionNames):
        text = text or ""
        if len(sectionNames) == 1:
            text = [t if t else "" for t in text]
            # Bandaid for mismatch between text structure, join recursively if text
            # elements are lists instead of strings.
            return "\n".join([t if isinstance(t, basestring) else "\n".join(t) for t in text])
        flat = ""
        for i in range(len(text)):
            section = section_to_daf(i + 1) if sectionNames[0] == "Daf" else str(i + 1)
            flat += "\n\n%s %s\n\n%s" % (sectionNames[0], section, flatten(text[i], sectionNames[1:]))

        return flat

    text += index.nodes.traverse_to_string(make_node)


    return text
Exemplo n.º 14
0
def remove_old_counts():
    """
    Deletes counts documents which no longer correspond to a text or category.
    """
    # counts = model.CountSet()
    # If there are counts documents save in the DB with invalid titles,
    # instantiation of the Count will cause a BookNameError.
    # But in this code instantiation happens in the line 'for count in counts'
    # How do we catch that? Additionally, we need access to the bad title after
    # The error has occurred. How would we get that? Reverting to direct DB call for now.
    counts = db.counts.find()
    for count in counts:
        if count.get("title", None):
            try:
                model.get_index(count["title"])
            except BookNameError:
                print u"Old count: %s" % count["title"]
                #count.delete()
                db.counts.remove({"_id": count["_id"]})
        else:
            #TODO incomplete for Category Counts.
            continue
            categories = count.categories
            i = model.IndexSet({
                "$and": [{
                    'categories.0': categories[0]
                }, {
                    "categories": {
                        "$all": categories
                    }
                }, {
                    "categories": {
                        "$size": len(categories)
                    }
                }]
            })
            if not i.count():
                print "Old category %s" % " > ".join(categories)
Exemplo n.º 15
0
def export_text(text):
	"""
	Iterates through all text documents, writing a document to disk
	according to formats in export_formats
	"""
	print text["title"]
	try:
		index = model.get_index(text["title"])
	except Exception as e:
		print "Skipping %s - %s" % (text["title"], e.message)
		return

	text.update(index.contents())
	del text["_id"]
	text["text"] = text.pop("chapter")

	export_text_doc(text)
Exemplo n.º 16
0
def export_text(text):
    """
	Iterates through all text documents, writing a document to disk
	according to formats in export_formats
	"""
    print text["title"]
    try:
        index = model.get_index(text["title"])
    except Exception as e:
        print "Skipping %s - %s" % (text["title"], e.message)
        return

    text.update(index.contents())
    del text["_id"]
    text["text"] = text.pop("chapter")

    export_text_doc(text)
Exemplo n.º 17
0
def export_text(text):
    """
	Exports 'text' (a document from the texts collection, or virtual merged document) 
	by preparing it as a export document and passing to 'export_text_doc'.
	"""
    print text["title"]
    try:
        index = model.get_index(text["title"])
    except Exception as e:
        print "Skipping %s - %s" % (text["title"], e.message)
        return

    text["heTitle"] = index.nodes.primary_title("he")
    text["categories"] = index.categories
    text["text"] = text.get("text", None) or text.get("chapter", "")

    if index.is_complex():
        def min_node_props(node, depth, **kwargs):
            js = {"heTitle": node.primary_title("he"),
                  "enTitle": node.primary_title("en"),
                  "key": node.key}

            return js

        def key2title(text_node, schema_node):
            for temp_schema_node in schema_node:
                new_key = temp_schema_node["enTitle"]
                text_node[new_key] = text_node.pop(temp_schema_node["key"])
                del temp_schema_node["key"]
                if "nodes" in temp_schema_node:
                    key2title(text_node[new_key], temp_schema_node["nodes"])

        text["schema"] = index.nodes.traverse_to_json(min_node_props)
        key2title(text["text"], text["schema"]["nodes"])

    else:
        text["sectionNames"] = index.schema["sectionNames"]

    if "_id" in text:
        del text["_id"]
        del text["chapter"]


    export_text_doc(text)
Exemplo n.º 18
0
def update_links_count(text=None):
	"""
	Counts the links that point to a particular text, or all of them

	Results are stored them on the 'linksCount' field of the counts document
	"""
	if not text:
		counts = db.counts.find({"title": {"$exists": 1}})
		for c in counts:
			if c["title"]:
				update_links_count(text=c["title"])

	print "%s" % text
	index = model.get_index(text)   #This is likely here just to catch any exceptions that are thrown

	c = { "title": text }
	c = db.counts.find_one(c)

	c["linksCount"] = model.LinkSet(model.Ref(text)).count()
		#db.links.find({"refs": {"$regex": model.Ref(text).regex()}}).count()

	db.counts.save(c)
Exemplo n.º 19
0
def hebrew_term(s):
	"""
	Simple translations for common Hebrew words
	"""
	categories = {
		"Torah":                u"תורה",
		"Tanach":               u'תנ"ך',
		"Tanakh":               u'תנ"ך',
		"Prophets":             u"נביאים",
		"Writings":             u"כתובים",
		"Commentary":           u"מפרשים",
		"Targum":               u"תרגומים",
		"Mishnah":              u"משנה",
		"Tosefta":              u"תוספתא",
		"Talmud":               u"תלמוד",
		"Bavli":                u"בבלי",
		"Yerushalmi":           u"ירושלמי",
		"Rif":		            u'רי"ף',
		"Kabbalah":             u"קבלה",
		"Halakha":              u"הלכה",
		"Halakhah":             u"הלכה",
		"Midrash":              u"מדרש",
		"Aggadic Midrash":      u"מדרש אגדה",
		"Halachic Midrash":     u"מדרש הלכה",
		"Midrash Rabbah":       u"מדרש רבה",
		"Responsa":             u'שו"ת',
		"Other":                u"אחר",
		"Siddur":               u"סידור",
		"Liturgy":              u"תפילה",
		"Piyutim":              u"פיוטים",
		"Musar":                u"ספרי מוסר",
		"Chasidut":             u"חסידות",
		"Parshanut":            u"פרשנות",
		"Philosophy":           u"מחשבת ישראל",
		"Maharal":	            u'מהר"ל מפראג',
		"Apocrypha":            u"ספרים חיצונים",
		"Seder Zeraim":         u"סדר זרעים",
		"Seder Moed":           u"סדר מועד",
		"Seder Nashim":         u"סדר נשים",
		"Seder Nezikin":        u"סדר נזיקין",
		"Seder Kodashim":       u"סדר קדשים",
		"Seder Toharot":        u"סדר טהרות",
		"Seder Tahorot":        u"סדר טהרות",
		"Dictionary":           u"מילון",
		"Early Jewish Thought": u"מחשבת ישראל קדומה",
		"Minor Tractates":      u"מסכתות קטנות",
		"Rosh":		            u'רא"ש',
		"Maharsha":	            u'מהרשא',
		"Rashba":	            u'רשב"א',
		"Rambam":	            u'רמב"ם',
		"Radbaz":		u'רדב"ז',
		"Tosafot Yom Tov":      u"תוספות יום טוב",
		"Chidushei Halachot":   u"חידושי הלכות",
		"Chidushei Agadot":     u"חידושי אגדות",
		"Tiferet Shmuel":       u"תפארת שמואל",
		"Korban Netanel":       u"קרבן נתנאל",
		"Pilpula Charifta":     u"פילפולא חריפתא",
		"Divrey Chamudot":      u"דברי חמודות",
		"Maadaney Yom Tov":     u"מעדני יום טוב",
		"Modern Works":		u"יצירות מודרניות",

	}

	pseudo_categories = {
		"Mishneh Torah":   u"משנה תורה",
		'Introduction':    u"הקדמה",
		'Sefer Madda':     u"ספר מדע",
		'Sefer Ahavah':    u"ספר אהבה",
		'Sefer Zemanim':   u"ספר זמנים",
		'Sefer Nashim':    u"ספר נשים",
		'Sefer Kedushah':  u"ספר קדושה",
		'Sefer Haflaah':   u"ספר הפלאה",
		'Sefer Zeraim':    u"ספר זרעים",
		'Sefer Avodah':    u"ספר עבודה",
		'Sefer Korbanot':  u"ספר קורבנות",
		'Sefer Taharah':   u"ספר טהרה",
		'Sefer Nezikim':   u"ספר נזיקין",
		'Sefer Kinyan':    u"ספר קניין",
		'Sefer Mishpatim': u"ספר משפטים",
		'Sefer Shoftim':   u"ספר שופטים",
		"Shulchan Arukh":  u"שולחן ערוך",
	}

	section_names = {
		"Chapter":          u"פרק",
		"Chapters":	    u"פרקים",
		"Perek":            u"פרק",
		"Line":             u"שורה",
		"Daf":              u"דף",
		"Paragraph":        u"פסקה",
		"Parsha":           u"פרשה",
		"Parasha":          u"פרשה",
		"Parashah":         u"פרשה",
		"Seif":             u"סעיף",
		"Se'if":            u"סעיף",
		"Siman":            u"סימן",
		"Section":          u"חלק",
		"Verse":            u"פסוק",
		"Sentence":         u"משפט",
		"Sha'ar":           u"שער",
		"Gate":             u"שער",
		"Comment":          u"פירוש",
		"Phrase":           u"ביטוי",
		"Mishna":           u"משנה",
		"Chelek":           u"חלק",
		"Helek":            u"חלק",
		"Year":             u"שנה",
		"Masechet":         u"מסכת",
		"Massechet":        u"מסכת",
		"Letter":           u"אות",
		"Halacha":          u"הלכה",
		"Seif Katan":       u"סעיף קטן",
		"Se'if Katan":	    u"סעיף קטן",
		"Volume":           u"כרך",
		"Book":             u"ספר",
		"Shar":             u"שער",
		"Seder":            u"סדר",
		"Part":             u"חלק",
		"Pasuk":            u"פסוק",
		"Sefer":            u"ספר",
		"Teshuva":          u"תשובה",
		"Teshuvot":         u"תשובות",
		"Tosefta":          u"תוספתא",
		"Halakhah":         u"הלכה",
		"Kovetz":           u"קובץ",
		"Path":             u"נתיב",
		"Parshah":          u"פרשה",
		"Midrash":          u"מדרש",
		"Mitzvah":          u"מצוה",
		"Tefillah":         u"תפילה",
		"Torah":            u"תורה",
		"Perush":	        u"פירוש",
		"Peirush":	        u"פירוש",
		"Aliyah":	        u"עלייה",
		"Tikkun":           u"תיקון",
		"Tikkunim":         u"תיקונים",
        "Hilchot":          u"הילכות",
        "Topic":            u"נושא",
        "Contents":         u"תוכן"
    }

	words = dict(categories.items() + pseudo_categories.items() + section_names.items())

	if s in words:
		return words[s]

	# If s is a text title, look for a stored Hebrew title
	try:
		from sefaria.model import get_index, IndexSet
		from sefaria.system.exceptions import BookNameError
		i = get_index(s)
		return i.get_title("he")
	except BookNameError:
		pass

	return s
Exemplo n.º 20
0
def update_text_count(book_title):
	"""
	Update the count records of the text specfied
	by ref (currently at book level only) by peforming a count
	"""
	index = model.get_index(book_title)

	c = { "title": book_title }
	existing = db.counts.find_one(c)
	if existing:
		c = existing

	en = count_texts(book_title, lang="en")
	if "error" in en:  # Still valid?
		return en
	he = count_texts(book_title, lang="he")
	if "error" in he:  # Still valid?
		return he
	c["allVersionCounts"] = sum_count_arrays(en["counts"], he["counts"])

	# totals is a zero filled JA representing to shape of total available texts
	# sum with each language to ensure counts have a 0 anywhere where they
	# are missing a segment
	totals  = zero_jagged_array(c["allVersionCounts"])
	enCount = sum_count_arrays(en["counts"], totals)
	heCount = sum_count_arrays(he["counts"], totals)

	c["availableTexts"] = {
		"en": enCount,
		"he": heCount,
	}

	c["availableCounts"] = {
		"en": en["lengths"],
		"he": he["lengths"],
	}

	if getattr(index, "length", None) and getattr(index, "lengths", None):
		depth = len(index.lengths)
		heTotal = enTotal = total = 0
		for i in range(depth):
			heTotal += he["lengths"][i]
			enTotal += en["lengths"][i]
			total += index.lengths[i]
		if total == 0:
			hp = ep = 0
		else:
			hp = heTotal / float(total) * 100
			ep = enTotal / float(total) * 100

			#temp check to see if text has wrong metadata leading to incorrect (to high) percentage
			"""if hp > 100:
				print index.title, " in hebrew has stats out of order: ", heTotal, "/", total, "=", hp
			if ep > 100:
				print index.title, " in english has stats out of order: ", enTotal, "/", total, "=", ep"""

	elif getattr(index, "length", None):
		hp = c["availableCounts"]["he"][0] / float(index.length) * 100
		ep = c["availableCounts"]["en"][0] / float(index.length) * 100
	else:
		hp = ep = 0


	c["percentAvailable"] = {
		"he": hp,
		"en": ep,
	}
	c["textComplete"] = {
		"he": hp > 99.9,
		"en": ep > 99.9,
	}

	#function to estimate how much of a text we have
	c['estimatedCompleteness'] = {
		"he" : estimate_completeness('he', index, c),
		"en" : estimate_completeness('en', index, c)
	}

	db.counts.save(c)
	return c
Exemplo n.º 21
0
def hebrew_term(s):
    """
	Simple translations for common Hebrew words
	"""
    categories = {
        "Torah": u"תורה",
        "Tanach": u'תנ"ך',
        "Tanakh": u'תנ"ך',
        "Prophets": u"נביאים",
        "Writings": u"כתובים",
        "Commentary": u"מפרשים",
        "Targum": u"תרגומים",
        "Mishnah": u"משנה",
        "Tosefta": u"תוספתא",
        "Talmud": u"תלמוד",
        "Bavli": u"בבלי",
        "Yerushalmi": u"ירושלמי",
        "Rif": u'רי"ף',
        "Kabbalah": u"קבלה",
        "Halakha": u"הלכה",
        "Halakhah": u"הלכה",
        "Midrash": u"מדרש",
        "Aggadic Midrash": u"מדרש אגדה",
        "Halachic Midrash": u"מדרש הלכה",
        "Midrash Rabbah": u"מדרש רבה",
        "Responsa": u'שו"ת',
        "Rashba": u'רשב"א',
        "Rambam": u'רמב"ם',
        "Other": u"אחר",
        "Siddur": u"סידור",
        "Liturgy": u"תפילה",
        "Piyutim": u"פיוטים",
        "Musar": u"ספרי מוסר",
        "Chasidut": u"חסידות",
        "Parshanut": u"פרשנות",
        "Philosophy": u"מחשבת ישראל",
        "Maharal": u'מהר"ל מפראג',
        "Apocrypha": u"ספרים חיצונים",
        "Seder Zeraim": u"סדר זרעים",
        "Seder Moed": u"סדר מועד",
        "Seder Nashim": u"סדר נשים",
        "Seder Nezikin": u"סדר נזיקין",
        "Seder Kodashim": u"סדר קדשים",
        "Seder Toharot": u"סדר טהרות",
        "Seder Tahorot": u"סדר טהרות",
        "Dictionary": u"מילון",
        "Early Jewish Thought": u"מחשבת ישראל קדומה",
        "Minor Tractates": u"מסכתות קטנות",
        "Rosh": u'ר"אש',
        "Maharsha": u'מהרשא',
    }

    pseudo_categories = {
        "Mishneh Torah": u"משנה תורה",
        'Introduction': u"הקדמה",
        'Sefer Madda': u"ספר מדע",
        'Sefer Ahavah': u"ספר אהבה",
        'Sefer Zemanim': u"ספר זמנים",
        'Sefer Nashim': u"ספר נשים",
        'Sefer Kedushah': u"ספר קדושה",
        'Sefer Haflaah': u"ספר הפלאה",
        'Sefer Zeraim': u"ספר זרעים",
        'Sefer Avodah': u"ספר עבודה",
        'Sefer Korbanot': u"ספר קורבנות",
        'Sefer Taharah': u"ספר טהרה",
        'Sefer Nezikim': u"ספר נזיקין",
        'Sefer Kinyan': u"ספר קניין",
        'Sefer Mishpatim': u"ספר משפטים",
        'Sefer Shoftim': u"ספר שופטים",
        "Shulchan Arukh": u"שולחן ערוך",
    }

    section_names = {
        "Chapter": u"פרק",
        "Perek": u"פרק",
        "Line": u"שורה",
        "Daf": u"דף",
        "Paragraph": u"פסקה",
        "Parsha": u"פרשה",
        "Parasha": u"פרשה",
        "Parashah": u"פרשה",
        "Seif": u"סעיף",
        "Se'if": u"סעיף",
        "Siman": u"סימן",
        "Section": u"חלק",
        "Verse": u"פסוק",
        "Sentence": u"משפט",
        "Sha'ar": u"שער",
        "Gate": u"שער",
        "Comment": u"פירוש",
        "Phrase": u"ביטוי",
        "Mishna": u"משנה",
        "Chelek": u"חלק",
        "Helek": u"חלק",
        "Year": u"שנה",
        "Masechet": u"מסכת",
        "Massechet": u"מסכת",
        "Letter": u"אות",
        "Halacha": u"הלכה",
        "Seif Katan": u"סעיף קטן",
        "Se'if Katan": u"סעיף קטן",
        "Volume": u"כרך",
        "Book": u"ספר",
        "Shar": u"שער",
        "Seder": u"סדר",
        "Part": u"חלק",
        "Pasuk": u"פסוק",
        "Sefer": u"ספר",
        "Teshuva": u"תשובה",
        "Teshuvot": u"תשובות",
        "Tosefta": u"תוספתא",
        "Halakhah": u"הלכה",
        "Kovetz": u"קובץ",
        "Path": u"נתיבה",
        "Parshah": u"פרשה",
        "Midrash": u"מדרש",
        "Mitzvah": u"מצוה",
        "Tefillah": u"תפילה",
        "Torah": u"תורה",
        "Perush": u"פירוש",
        "Peirush": u"פירוש",
        "Aliyah": u"עלייה",
        "Tikkun": u"תיקון",
        "Tikkunim": u"תיקונים"
    }

    words = dict(categories.items() + pseudo_categories.items() +
                 section_names.items())

    if s in words:
        return words[s]

    # If s is a text title, look for a stored Hebrew title
    try:
        from sefaria.model import get_index, IndexSet
        from sefaria.system.exceptions import BookNameError
        i = get_index(s)
        return i.get_title("he")
    except BookNameError:
        # Try looking in the commentator section of a Commentary2 text
        indexes = IndexSet({"title": {"$regex": "^" + s + " on "}})
        for i in indexes:
            return i.toc_contents()["heCommentator"]

    return s