示例#1
0
def test_invalid_index_save_no_existing_base_text():
    title = 'Bartenura (The Next Generation)'
    model.IndexSet({"title": title}).delete()
    d = {
         "categories" : [
            "Mishnah",
            "Commentary",
            "Bartenura",
            "Seder Zeraim"
        ],
        "base_text_titles": ["Gargamel"],
        "title" : title,
        "schema" : {
            "titles" : [
                {
                    "lang" : "en",
                    "text" : title,
                    "primary" : True
                },
                {
                    "lang" : "he",
                    "text" : "פרשן",
                    "primary" : True
                }
            ],
            "nodeType" : "JaggedArrayNode",
            "depth" : 2,
            "sectionNames" : [
                "Section",
                "Line"
            ],
            "addressTypes" : [
                "Integer",
                "Integer"
            ],
            "key": title
        },
    }
    idx = model.Index(d)
    with pytest.raises(InputError) as e_info:
        idx.save()
    assert "Base Text Titles must point to existing texts in the system." in str(e_info.value)
    assert model.IndexSet({"title": title}).count() == 0
示例#2
0
def teardown_module(module):
    titles = [
        'Test Commentator Name', 'Bartenura (The Next Generation)',
        'Test Index Name', "Changed Test Index", "Third Attempt", "Test Iu",
        "Test Del"
    ]

    for title in titles:
        model.IndexSet({"title": title}).delete()
        model.VersionSet({"title": title}).delete()
示例#3
0
def test_dup_index_save():
    title = 'Test Commentator Name'
    model.IndexSet({"title": title}).delete()
    d = {
        "categories": ["Liturgy"],
        "title": title,
        "schema": {
            "titles": [{
                "lang": "en",
                "text": title,
                "primary": True
            }, {
                "lang": "he",
                "text": "פרשן",
                "primary": True
            }],
            "nodeType":
            "JaggedArrayNode",
            "depth":
            2,
            "sectionNames": ["Section", "Line"],
            "addressTypes": ["Integer", "Integer"],
            "key":
            title
        },
    }
    idx = model.Index(d)
    idx.save()
    assert model.IndexSet({"title": title}).count() == 1
    try:
        d2 = {
            "title": title,
            "heTitle": u"פרשן ב",
            "titleVariants": [title],
            "sectionNames": ["Chapter", "Paragraph"],
            "categories": ["Commentary"],
            "lengths": [50, 501]
        }
        idx2 = model.Index(d2).save()
    except:
        pass

    assert model.IndexSet({"title": title}).count() == 1
示例#4
0
def test_invalid_index_save_no_category():
    title = 'Bartenura (The Next Generation)'
    model.IndexSet({"title": title}).delete()
    d = {
         "categories" : [
            "Mishnah",
            "Commentary",
            "Bartenura",
            "Gargamel"
        ],
        "title" : title,
        "schema" : {
            "titles" : [
                {
                    "lang" : "en",
                    "text" : title,
                    "primary" : True
                },
                {
                    "lang" : "he",
                    "text" : "פרשן",
                    "primary" : True
                }
            ],
            "nodeType" : "JaggedArrayNode",
            "depth" : 2,
            "sectionNames" : [
                "Section",
                "Line"
            ],
            "addressTypes" : [
                "Integer",
                "Integer"
            ],
            "key": title
        },
    }
    idx = model.Index(d)
    with pytest.raises(InputError) as e_info:
        idx.save()
    assert "You must create category Mishnah/Commentary/Bartenura/Gargamel before adding texts to it." in str(e_info.value)
    assert model.IndexSet({"title": title}).count() == 0
示例#5
0
def test_invalid_index_save_no_hebrew_collective_title():
    title = 'Bartenura (The Next Generation)'
    model.IndexSet({"title": title}).delete()
    d = {
         "categories" : [
            "Mishnah",
            "Rishonim on Mishnah",
            "Bartenura"
        ],
        "collective_title": 'Gargamel',
        "title" : title,
        "schema" : {
            "titles" : [
                {
                    "lang" : "en",
                    "text" : title,
                    "primary" : True
                },
                {
                    "lang" : "he",
                    "text" : "פרשן",
                    "primary" : True
                }
            ],
            "nodeType" : "JaggedArrayNode",
            "depth" : 2,
            "sectionNames" : [
                "Section",
                "Line"
            ],
            "addressTypes" : [
                "Integer",
                "Integer"
            ],
            "key": title
        },
    }
    idx = model.Index(d)
    with pytest.raises(InputError) as e_info:
        idx.save()
    assert "You must add a hebrew translation Term for any new Collective Title: Gargamel." in str(e_info.value)
    assert model.IndexSet({"title": title}).count() == 0
示例#6
0
def get_book_link_collection(book, cat):

    if cat == "Tanach" or cat == "Torah" or cat == "Prophets" or cat == "Writings":
        query = {
            "$and": [{
                "categories": cat
            }, {
                "categories": {
                    "$ne": "Commentary"
                }
            }, {
                "categories": {
                    "$ne": "Targum"
                }
            }]
        }
    else:
        query = {"categories": cat}

    titles = model.IndexSet(query).distinct("title")
    if len(titles) == 0:
        return {"error": "No results for {}".format(query)}

    book_re = r'^{} \d'.format(book)
    cat_re = r'^({}) \d'.format('|'.join(titles))

    link_re = r'^(?P<title>.+) (?P<loc>\d.*)$'
    ret = []

    links = model.LinkSet({
        "$and": [{
            "refs": {
                "$regex": book_re
            }
        }, {
            "refs": {
                "$regex": cat_re
            }
        }]
    })
    for link in links:
        l1 = re.match(link_re, link.refs[0])
        l2 = re.match(link_re, link.refs[1])
        ret.append({
            "r1": {
                "title": l1.group("title").replace(" ", "-"),
                "loc": l1.group("loc")
            },
            "r2": {
                "title": l2.group("title").replace(" ", "-"),
                "loc": l2.group("loc")
            }
        })
    return ret
示例#7
0
 def setup_class(cls):
     model.IndexSet({
         "title": {
             "$in": [
                 "New Toc Title Test", "New Toc Test",
                 "Another New Toc Test", "Harchev Davar on Joshua"
             ]
         }
     }).delete()
     model.library.rebuild_toc()
     cls.toc = model.library.get_toc()
     cls.search_toc = model.library.get_search_filter_toc()
示例#8
0
def export_schemas():
    path = SEFARIA_EXPORT_PATH + "/schemas/"
    if not os.path.exists(path):
        os.makedirs(path)
    for i in model.IndexSet():
        title = i.title.replace(" ", "_")
        with open(path + title + ".json", "w") as f:
            try:
                f.write(make_json(i.contents(v2=True)).encode('utf-8'))
            except InputError as e:
			    print "InputError: %s" % e
			    with open(SEFARIA_EXPORT_PATH + "/errors.log", "a") as error_log:
			        error_log.write("%s - InputError: %s\n" % (datetime.now(), e))
示例#9
0
def rename_category(old, new):
    """
	Walk through all index records, replacing every category instance
	called 'old' with 'new'.
	"""
    indices = model.IndexSet({"categories": old})

    assert indices.count(), "No categories named {}".format(old)

    for i in indices:
        i.categories = [new if cat == old else cat for cat in i.categories]
        i.save()

    summaries.update_summaries()
示例#10
0
def update_counts(ref=None):
	"""
	Update the count records of all texts or the text specfied
	by ref (currently at book level only) by peforming a count
	"""
	if ref:
		update_text_count(ref)
		return

	indices = model.IndexSet()

	for index in indices:
		if index.is_commentary():
			cRef = "^{} on ".format(index.title)
			texts = model.VersionSet({"title": {"$regex": cRef}}).distinct("title")
			for text in texts:
				update_text_count(text)
		else:
			update_text_count(index.title)

	summaries.update_summaries()
示例#11
0
def remove_old_counts():
    """
    Deletes counts documents which no longer correspond to a text or category.
    """
    # counts = model.CountSet()
    # If there are counts documents save in the DB with invalid titles,
    # instantiation of the Count will cause a BookNameError.
    # But in this code instantiation happens in the line 'for count in counts'
    # How do we catch that? Additionally, we need access to the bad title after
    # The error has occurred. How would we get that? Reverting to direct DB call for now.
    counts = db.counts.find()
    for count in counts:
        if count.get("title", None):
            try:
                model.get_index(count["title"])
            except BookNameError:
                print u"Old count: %s" % count["title"]
                #count.delete()
                db.counts.remove({"_id": count["_id"]})
        else:
            #TODO incomplete for Category Counts.
            continue
            categories = count.categories
            i = model.IndexSet({
                "$and": [{
                    'categories.0': categories[0]
                }, {
                    "categories": {
                        "$all": categories
                    }
                }, {
                    "categories": {
                        "$size": len(categories)
                    }
                }]
            })
            if not i.count():
                print "Old category %s" % " > ".join(categories)
示例#12
0
def export_schemas():
	for i in model.IndexSet():
		title = i.title.replace(" ", "_")
		with open(SEFARIA_DATA_PATH + "/export/schemas/" + title, "w") as f:
			f.write(make_json(i.contents()))		
 def teardown_class(cls):
     titles = ["New Toc Title Test", "New Toc Test", "Another New Toc Test", "Harchev Davar on Joshua", "Bob is your Uncle"]
     for title in titles:
         model.IndexSet({"title": title}).delete()
         model.VersionSet({"title": title}).delete()
示例#14
0
def dep_counts(name):
    commentators = model.IndexSet({
        "categories.0": "Commentary"
    }).distinct("title")
    ref_patterns = {
        'alone':
        r'^{} \d'.format(re.escape(name)),
        'commentor':
        r'{} on'.format(re.escape(name)),
        'commentee':
        r'^({}) on {} \d'.format("|".join(commentators), re.escape(name))
    }

    commentee_title_pattern = r'^({}) on {} \d'.format("|".join(commentators),
                                                       re.escape(name))

    ret = {
        'version title exact match':
        model.VersionSet({
            "title": name
        }).count(),
        'version title match commentor':
        model.VersionSet({
            "title": {
                "$regex": ref_patterns["commentor"]
            }
        }).count(),
        'version title match commentee':
        model.VersionSet({
            "title": {
                "$regex": commentee_title_pattern
            }
        }).count(),
        'history title exact match':
        model.HistorySet({
            "title": name
        }).count(),
        'history title match commentor':
        model.HistorySet({
            "title": {
                "$regex": ref_patterns["commentor"]
            }
        }).count(),
        'history title match commentee':
        model.HistorySet({
            "title": {
                "$regex": commentee_title_pattern
            }
        }).count(),
    }

    for pname, pattern in ref_patterns.items():
        ret.update({
            'note match ' + pname:
            model.NoteSet({
                "ref": {
                    "$regex": pattern
                }
            }).count(),
            'link match ' + pname:
            model.LinkSet({
                "refs": {
                    "$regex": pattern
                }
            }).count(),
            'history refs match ' + pname:
            model.HistorySet({
                "ref": {
                    "$regex": pattern
                }
            }).count(),
            'history new refs match ' + pname:
            model.HistorySet({
                "new.refs": {
                    "$regex": pattern
                }
            }).count()
        })

    return ret
示例#15
0
def count_category(cat, lang=None):
	"""
	Count the number of sections of various types in an entire category and calculate percentages
	Depends on text counts already being saved in counts collection
	"""
	if not lang:
		# If no language specified, return a dict with English and Hebrew,
		# grouping hebrew and english fields
		cat = [cat] if isinstance(cat, basestring) else cat
		en = count_category(cat, "en")
		he = count_category(cat, "he")
		counts = {
			"percentAvailable": {
				"he": he["percentAvailable"],
				"en": en["percentAvailable"]
				},
			"availableCounts": {
				"he": he["availableCounts"],
				"en": en["availableCounts"]
				}
		}
		counts["textComplete"] = {
			"he": he["percentAvailable"] > 99.5,
			"en": en["percentAvailable"] > 99.5,
		}

		# Save to the DB
		remove_doc = {"$and": [{'categories.0': cat[0]}, {"categories": {"$all": cat}}, {"categories": {"$size": len(cat)}} ]}
		db.counts.remove(remove_doc)
		counts_doc = {"categories": cat}
		counts_doc.update(counts)
		db.counts.save(counts_doc)

		return counts

	# Count this cateogry
	counts = defaultdict(int)
	percent = 0.0
	percentCount = 0
	cat = [cat] if isinstance(cat, basestring) else cat
	indxs = model.IndexSet({"$and": [{'categories.0': cat[0]}, {"categories": {"$all": cat}}]})
	for indx in indxs:
		counts["Text"] += 1
		text_count = model.Count().load({ "title": indx.title })
		if not text_count or not hasattr(text_count, "availableCounts") or not hasattr(indx, "sectionNames"):
			continue

		c = text_count.availableCounts[lang]
		for i in range(len(indx.sectionNames)):
			if len(c) > i:
				counts[indx.sectionNames[i]] += c[i]

		if hasattr(text_count, "percentAvailable") and isinstance(percent, float):
			percentCount += 1
			percent += text_count.percentAvailable[lang] if isinstance(text_count.percentAvailable[lang], float) else 0.0
		else:
			percent = "unknown"

	percentCount = 1 if percentCount == 0 else percentCount
	percent = percent / percentCount if isinstance(percent, float) else "unknown"

	if "Daf" in counts:
		counts["Amud"] = counts["Daf"]
		counts["Daf"] = counts["Daf"] / 2

	return { "availableCounts": dict(counts), "percentAvailable": percent }
示例#16
0
def test_index_delete():
    #Simple Text
    ti = "Test Del"
    model.IndexSet({"title": ti}).delete()
    model.VersionSet({"title": ti}).delete()

    i = model.Index({
        "title": ti,
        "heTitle": u"כבכב",
        "titleVariants": [ti],
        "sectionNames": ["Chapter", "Paragraph"],
        "categories": ["Musar"],
        "lengths": [50, 501]
    }).save()
    new_version1 = model.Version({
        "chapter": i.nodes.create_skeleton(),
        "versionTitle": "Version 1 TEST",
        "versionSource": "blabla",
        "language": "he",
        "title": i.title
    })
    new_version1.chapter = [[u''], [u''], [u"לה לה לה לא חשוב על מה"]]
    new_version1.save()
    new_version2 = model.Version({
        "chapter": i.nodes.create_skeleton(),
        "versionTitle": "Version 2 TEST",
        "versionSource": "blabla",
        "language": "en",
        "title": i.title
    })
    new_version2.chapter = [[], ["Hello goodbye bla bla blah"], []]
    new_version2.save()

    i.delete()
    assert model.Index().load({'title': ti}) is None
    assert model.VersionSet({'title': ti}).count() == 0

    #Commentator
    from sefaria.helper.text import create_commentator_and_commentary_version

    commentator_name = "Commentator Del"
    he_commentator_name = u"פרשנדנן"
    base_book = 'Genesis'
    base_book2 = 'Pesach Haggadah'

    model.IndexSet({"title": commentator_name}).delete()
    model.VersionSet({"title": commentator_name + " on " + base_book}).delete()
    model.VersionSet({
        "title": commentator_name + " on " + base_book2
    }).delete()

    create_commentator_and_commentary_version(commentator_name, base_book,
                                              'he', 'test', 'test',
                                              he_commentator_name)
    create_commentator_and_commentary_version(commentator_name, base_book2,
                                              'he', 'test', 'test',
                                              he_commentator_name)

    ci = model.Index().load({'title': commentator_name}).delete()
    assert model.Index().load({'title': commentator_name}) is None
    assert model.VersionSet({
        'title': {
            '$regex': commentator_name
        }
    }).count() == 0
示例#17
0
import sefaria.model as model
from sefaria.system.database import db
from sefaria.clean import remove_old_counts

# Move the history books

model.IndexSet({"categories":"History"}).update({"categories": [u'Apocrypha']})

anaBekhoach = model.Index().load({'title': 'Ana BeKhoach'})
anaBekhoach.categories = [u'Liturgy',u'Piyutim'] #why doesn't update() work on an instance?
anaBekhoach.save()

model.IndexSet({"title":{"$regex": "Rabbah?"}}).update({"categories": ['Midrash', 'Aggadic Midrash', 'Midrash Rabbah']})
#this one should not have been updated.
model.Index().update({'title': 'Tanna Debei Eliyahu Rabbah'}, {'categories': ['Midrash', 'Aggadic Midrash']})

model.IndexSet({'title': {"$regex" : 'Ein Yaakov'}}).update({'categories': ['Midrash', 'Aggadic Midrash']})

model.Index().update({'title': 'Midrash Tanchuma'}, {'categories': ['Midrash', 'Aggadic Midrash']})
model.Index().update({'title': 'Legends of the Jews'}, {'categories': ['Midrash', 'Aggadic Midrash']})
model.Index().update({'title': 'Midrash Mishlei'}, {'categories': ['Midrash', 'Aggadic Midrash']})
model.Index().update({'title': 'Pirkei Derabi Eliezer'}, {'categories': ['Midrash', 'Aggadic Midrash']})
model.Index().update({'title': 'Midrash on Proverbs'}, {'categories': ['Midrash', 'Aggadic Midrash']})
model.Index().update({'title': "Midrash B'not Zelophehad"}, {'categories': ['Midrash', 'Aggadic Midrash']})
model.Index().update({'title': 'Midrash Tehilim'}, {'categories': ['Midrash', 'Aggadic Midrash']})
model.Index().update({'title': 'Pesikta de rav kahana'}, {'categories': ['Midrash', 'Aggadic Midrash']})
model.Index().update({'title': 'The Fathers according to Rabbi Nathan'}, {'categories': ['Midrash', 'Aggadic Midrash']})
model.Index().update({'title': 'Yalkut Shimoni'}, {'categories': ['Midrash', 'Aggadic Midrash']})


model.Index().update({'title': 'Sifra'}, {'categories': ['Midrash', 'Halachic Midrash']})
示例#18
0
import json
import pytest
import sefaria.summaries as s
import sefaria.model as model
import sefaria.system.cache as scache
from sefaria.system.exceptions import BookNameError
from sefaria.utils.testing_utils import *

#create, update, delete, change categories
# test that old title goes away on index title change (regular + commentary)
# test that no commentator is added
# no wandering commentaries
""" SOME SETUP """

text_titles = model.IndexSet({}).distinct('title')
model.library.rebuild_toc()
""" THE TESTS """


class Test_Toc(object):
    @classmethod
    def setup_class(cls):
        model.library.rebuild_toc()

    @classmethod
    def teardown_class(cls):
        titles = [
            "New Toc Title Test", "New Toc Test", "Another New Toc Test",
            "Harchev Davar on Joshua", "Bob is your Uncle"
        ]
import sefaria.model as model
from sefaria.system.database import db
from sefaria.clean import remove_old_counts
from sefaria.counts import update_counts

# Remove duplicate 'Sefer Abudraham'

db.index.remove({"title": "Sefer Abudraham "})
db.index.remove({"title": "Tiferet Yisrael "})
db.index.remove({"title": "Igrot Moshe "})
db.index.remove({"title": "The Sabbath, Heschel  "})
db.index.remove({"title": "Sifre Devarim "})

remove_old_counts()

texts = model.IndexSet({})
for t in texts:
    if t.title != t.title.strip():
        t.title = t.title.strip()
        t.save()

ns = model.NoteSet({"public": {"$exists": False}})
for n in ns:
    if not getattr(n, "owner", None):
        n.owner = 1

ns.update({"public": False})

# Remove "sectionCounts" field form sectionCounts
db.counts.update({}, {"$unset": {"sectionCounts": ""}}, multi=True)
示例#20
0
def export_schemas():
	for i in model.IndexSet():
		title = i.title.replace(" ", "_")
		with open(SEFARIA_DATA_PATH + "/export/schemas/" + title + ".json", "w") as f:
			f.write(make_json(i.contents(v2=True)).encode('utf-8'))