Exemplo n.º 1
0
        def _create_parasha_topic_story(parasha_obj, mustHave=None, **kwargs):
            from sefaria.model.topic import get_topics
            from sefaria.utils.util import titlecase
            from sefaria.utils.calendars import make_parashah_response_from_calendar_entry

            page = iteration - 1
            topics = get_topics()
            parasha = text.Term.normalize(titlecase(parasha_obj["parasha"]))
            topic = topics.get(parasha)
            related_topics = [
                t for t, x in topic.related_topics[page * k:page * k + k]
                if x > 1
            ]
            if len(related_topics) < k:
                return

            cal = make_parashah_response_from_calendar_entry(parasha_obj)[0]

            cls.generate_story(topics=related_topics,
                               title={
                                   "en":
                                   "Topics in " + cal["displayValue"]["en"],
                                   "he":
                                   u"נושאים ב" + cal["displayValue"]["he"]
                               },
                               lead={
                                   "en": "Weekly Torah Portion",
                                   "he": u'פרשת השבוע'
                               },
                               mustHave=mustHave or [],
                               **kwargs).save()
Exemplo n.º 2
0
def normalize_new_topic_title(title):
    ALLOWED_HASHTAGS = ("#MeToo")
    if title not in ALLOWED_HASHTAGS:
        title = title.replace("#", "")
    # replace | with - b/c | is a reserved char for search sheet queries when filtering on tags
    title = titlecase(title).replace('|', '-')
    return title
Exemplo n.º 3
0
def update_sheet_tags(sheet_id, tags):
	"""
	Sets the tag list for sheet_id to those listed in list 'tags'.
	"""
	tags = list(set(tags)) 	# tags list should be unique
	normalizedTags = [titlecase(tag) for tag in tags]
	db.sheets.update({"id": sheet_id}, {"$set": {"tags": normalizedTags}})

	return {"status": "ok"}
Exemplo n.º 4
0
def update_sheet_tags(sheet_id, tags):
    """
	Sets the tag list for sheet_id to those listed in list 'tags'.
	"""
    tags = list(set(tags))  # tags list should be unique
    normalizedTags = [titlecase(tag) for tag in tags]
    db.sheets.update({"id": sheet_id}, {"$set": {"tags": normalizedTags}})

    return {"status": "ok"}
Exemplo n.º 5
0
def update_sheet_tags(sheet_id, tags):
    """
	Sets the tag list for sheet_id to those listed in list 'tags'.
	"""
    tags = list(set(tags))  # tags list should be unique
    # replace | with - b/c | is a reserved char for search sheet queries when filtering on tags
    normalizedTags = [titlecase(tag).replace('|', '-') for tag in tags]
    db.sheets.update({"id": sheet_id}, {"$set": {"tags": normalizedTags}})

    return {"status": "ok"}
Exemplo n.º 6
0
def update_sheet_tags(sheet_id, tags):
	"""
	Sets the tag list for sheet_id to those listed in list 'tags'.
	"""
	tags = list(set(tags)) 	# tags list should be unique
	# replace | with - b/c | is a reserved char for search sheet queries when filtering on tags
	normalizedTags = [titlecase(tag).replace('|','-') for tag in tags]
	db.sheets.update({"id": sheet_id}, {"$set": {"tags": normalizedTags}})

	return {"status": "ok"}
def normalize_tag(tag):
	"""
	Returns an array of `tags` that `tag` normalizes to, which may be empty or multiple
	"""
	if tag in FIXED_SUBS:
		return [FIXED_SUBS[tag]]

	tag = re.sub('#(\d+)', "\g<1>", tag) # replace hash'd numbers ("#1" with just numbers)
	tag = tag.replace("#", ",") # remove # and treat as separator
	if tag.startswith("http"):  # scrub URLS
		return []
	if re.search('^\w+([\.-]?\w+)*@\w+([\.-]?\w+)*(\.\w{2,3})+$', tag): # scrub email addresses
		return []
	tags = tag.split(",")
	tags = [tag for tag in tags if len(tag)]
	tags = [titlecase(tag.strip()) for tag in tags]
	tags = list(set(tags))
	return tags
Exemplo n.º 8
0
        en_term = min(he_terms[main_he])
        for secondary_term in [r.decode("utf-8") for r in row[1:] if r]:
            he_terms[secondary_term].add(en_term)
"""

# Load proto-terms from English variants sheet
# For each
# - If any of these terms, or their Hebrew synonyms, exist - add to existing term
# - else create new proto-term
with open(en_file, 'rb') as tfile:
    next(tfile)
    rows = csv.reader(tfile)
    for row in rows:
        (he_name, count, en_primary,
         en_names) = (h.unescape(row[0].decode("utf-8")), int(row[1]),
                      titlecase(row[2]),
                      list(set([titlecase(x) for x in row[3:] if x])))
        if replacement_dict.get(en_primary):
            if isinstance(replacement_dict.get(en_primary), list):
                continue
            en_primary = replacement_dict.get(en_primary)
        all_names = [he_name] + [en_primary] + en_names + he_synonyms.get(
            he_name, [])
        already_used = [
            name_to_term_map.get(x) for x in all_names
            if name_to_term_map.get(x)
        ]
        if len(set(already_used)) >= 2:
            print("Bridged Terms: {} {}".format(
                en_primary, [t.get_primary_title("en") for t in already_used]))
Exemplo n.º 9
0
from sefaria.system.database import db

sheets = db.sheets.find({"tags": {"$exists": "true"}})

for sheet in sheets:
    olddoc = sheet
    newdoc = {}
    normTags = []
    oldTags = olddoc["tags"]

    for tag in oldTags:
        if "," in tag:
            commaSeparatedTags = tag.split(',')
            for commaSeparatedTag in commaSeparatedTags:
                normTags.append(titlecase(commaSeparatedTag.strip()))

        else:
            normTags.append(titlecase(tag.strip()))

    newdoc = olddoc
    normTags = list(set(normTags))  # tags list should be unique

    newdoc["tags"] = normTags

    print(newdoc["id"])
    print(olddoc["tags"])
    print(newdoc["tags"])
    print("-------")

    #	print newdoc
Exemplo n.º 10
0
"""
        en_term = min(he_terms[main_he])
        for secondary_term in [r.decode("utf-8") for r in row[1:] if r]:
            he_terms[secondary_term].add(en_term)
"""


# Load proto-terms from English variants sheet
# For each
# - If any of these terms, or their Hebrew synonyms, exist - add to existing term
# - else create new proto-term
with open(en_file, 'rb') as tfile:
    next(tfile)
    rows = csv.reader(tfile)
    for row in rows:
        (he_name, count, en_primary, en_names) = (h.unescape(row[0].decode("utf-8")), int(row[1]), titlecase(row[2]), list(set([titlecase(x) for x in row[3:] if x])))
        if replacement_dict.get(en_primary):
            if isinstance(replacement_dict.get(en_primary), list):
                continue
            en_primary = replacement_dict.get(en_primary)
        all_names = [he_name] + [en_primary] + en_names + he_synonyms.get(he_name, [])
        already_used = [name_to_term_map.get(x) for x in all_names if name_to_term_map.get(x)]
        if len(set(already_used)) >= 2:
            print u"Bridged Terms: {} {}".format(en_primary, [t.get_primary_title("en") for t in already_used])

        elif len(set(already_used)) == 1:
            existing_term = already_used[0]
            assert isinstance(existing_term, Term)

            if not existing_term.has_title(he_name, "he"):
                existing_term.add_title(he_name, "he")
from sefaria.system.database import db

sheets = db.sheets.find({"tags": { "$exists": "true" } })

for sheet in sheets:
	olddoc = sheet;
	newdoc = {};
	normTags = [];
	oldTags = olddoc["tags"];

	for tag in oldTags:
		if "," in tag:
			commaSeparatedTags = tag.split(',')
			for commaSeparatedTag in commaSeparatedTags:
				normTags.append(titlecase(commaSeparatedTag.strip()))

		else:
			normTags.append(titlecase(tag))

	newdoc = olddoc
	normTags = list(set(normTags)) 	# tags list should be unique

	newdoc["tags"] = normTags


	print newdoc["id"]
	print olddoc["tags"]
	print newdoc["tags"]
	print "-------"