def __init__(self, lexicon_name): super(LexiconTrie, self).__init__(self.dict_letter_scope) for entry in LexiconEntrySet({"parent_lexicon": lexicon_name}, sort=[("_id", -1)]): self[hebrew.strip_nikkud(entry.headword)] = entry.headword for ahw in getattr(entry, "alt_headwords", []): self[hebrew.strip_nikkud(ahw)] = entry.headword
def clean_line(line): line = strip_nikkud(line) line = re.sub(u':', '', line) reg_parentheses = re.compile(u'\((.*?)\)') reg_brackets = re.compile(u'\[(.*?)\]') in_per = reg_parentheses.search(line) in_bra = reg_brackets.search(line) reg_ayyen_tur = re.compile(u'''ו?(עיין|עי'|ע"ש) בטור''') line = re.sub(u'\[.*?אלפס.*?\]', u'', line) line = re.sub(u'טור ו?שו"ע', u'טוש"ע', line) pos = re.search(reg_ayyen_tur, line) if pos: line = line[:pos.start()] if in_per: if in_bra: clean = re.sub(reg_brackets, ur'\1', line) # brackets are always correct clean = re.sub(reg_parentheses, '', clean) else: clean = re.sub(reg_parentheses, ur'\1', line) elif in_bra: clean = re.sub(reg_brackets, ur'\1', line) # brackets are always correct else: clean = line return clean
def clean_line(line): line = strip_nikkud(line) replace_dict = {u'[.:\?]': u'', u'[”״]': u'"', u'[’׳]': u"'"} #note put \. in the file/ how can i check if it is right? line = multiple_replace(line, replace_dict, using_regex=True) # line = re.sub(u'[:\?]', '', line) # line = re.sub(u'”', u'"', line) reg_parentheses = re.compile(u'\((.*?)\)') reg_brackets = re.compile(u'\[(.*?)\]') in_per = reg_parentheses.search(line) in_bra = reg_brackets.search(line) reg_ayyen_tur = re.compile(u'''ו?(עיין|עי'|ע"ש) בטור''') reg_lo_manu = re.compile(u'''(?P<a>(\u05d0\u05da )?\u05dc\u05d0 \u05de\u05e0(.*?))(\u05e1\u05de"?\u05d2|\u05e8\u05de\u05d1"?\u05dd|\u05d8\u05d5\u05e8|\n)''') line = re.sub(u'\[.*?אלפס.*?\]', u'', line) line = re.sub(u'טור ו?שו"ע', u'טוש"ע', line) f_ayyen = re.search(reg_ayyen_tur, line) f_lo_manu = re.search(reg_lo_manu, line) if f_ayyen: line = line[:f_ayyen.start()] if f_lo_manu: line = re.sub(f_lo_manu.group('a'), u"", line) if in_per: if in_bra: clean = re.sub(reg_brackets, ur'\1', line) # brackets are always correct clean = re.sub(reg_parentheses, '', clean) else: clean = re.sub(reg_parentheses, ur'\1', line) elif in_bra: clean = re.sub(reg_brackets, ur'\1', line) # brackets are always correct else: clean = line return clean
def body(self): self.load_toc() self.click_toc_category("Midrash") self.click_toc_text("Ein Yaakov") self.click_source_title() self.click_masechet_and_chapter('2', '3') section = self.get_section_txt('1') assert 'רבי זירא הוה קא משתמיט' in strip_nikkud(section)
def find_in_segment(self, st, lang='he', citing_only=False, replace=True): #todo: implemant replace = True """ Returns a list of Ref objects derived from string :param string st: the input string :param lang: "he" note: "en" is not yet supported in ibid :param citing_only: boolean whether to use only records explicitly marked as being referenced in text. :return: list of :class:`Ref` objects, list of locations and list of ref types (either REF or SHAM, defined in CitationFinder) """ refs = [] locations = [] types = [] failed_refs = [] # failed_non_refs = [] failed_shams = [] assert lang == 'he' # todo: support english st = strip_nikkud(st) all_refs = self._citationFinder.get_potential_refs( st, lang, citing_only=citing_only) for item, location, type in all_refs: if type == CitationFinder.REF_INT: try: refs += [ self._tr.resolve(item.index_node.full_title(), item.sections) ] locations += [location] types += [type] #refs += [(self._tr.resolve(item.index_node.full_title(), item.sections), 'ref')] except (IbidRefException, IbidKeyNotFoundException) as e: failed_refs += [item.normal()] elif type == CitationFinder.NON_REF_INT or type == CitationFinder.IGNORE_INT: # failed_non_refs += [item.group()] self._tr.ignore_book_name_keys() elif type == CitationFinder.SHAM_INT: try: if isinstance(item, str): refs += [self._tr.resolve(None, match_str=item)] locations += [location] types += [type] #refs += [(self._tr.resolve(None, match_str=item), 'sham')] else: refs += [self._tr.resolve(item[0], sections=item[1])] locations += [location] types += [type] #refs += [(self._tr.resolve(item[0], sections=item[1]), 'sham')] except (IbidRefException, IbidKeyNotFoundException, InputError) as e: failed_shams += [item] return refs, locations, types # , failed_refs, failed_non_refs, failed_shams
def find_in_segment(self, st, lang='he', citing_only=False, replace=True): #todo: implemant replace = True """ Returns a list of Ref objects derived from string :param string st: the input string :param lang: "he" note: "en" is not yet supported in ibid :param citing_only: boolean whether to use only records explicitly marked as being referenced in text. :return: list of :class:`Ref` objects, list of locations and list of ref types (either REF or SHAM, defined in CitationFinder) """ refs = [] locations = [] types = [] failed_refs = [] # failed_non_refs = [] failed_shams = [] assert lang == 'he' # todo: support english st = strip_nikkud(st) all_refs = self._citationFinder.get_potential_refs(st, lang) for item, location, type in all_refs: if type == CitationFinder.REF_INT: try: refs += [self._tr.resolve(item.index_node.full_title(), item.sections)] locations += [location] types += [type] #refs += [(self._tr.resolve(item.index_node.full_title(), item.sections), 'ref')] except (IbidRefException, IbidKeyNotFoundException) as e: failed_refs += [item.normal()] elif type == CitationFinder.NON_REF_INT or type == CitationFinder.IGNORE_INT: # failed_non_refs += [item.group()] self._tr.ignore_book_name_keys() elif type == CitationFinder.SHAM_INT: try: if isinstance(item, unicode): refs += [self._tr.resolve(None, match_str=item)] locations += [location] types += [type] #refs += [(self._tr.resolve(None, match_str=item), 'sham')] else: refs += [self._tr.resolve(item[0], sections=item[1])] locations += [location] types += [type] #refs += [(self._tr.resolve(item[0], sections=item[1]), 'sham')] except (IbidRefException, IbidKeyNotFoundException, InputError) as e: failed_shams += [item] return refs, locations, types # , failed_refs, failed_non_refs, failed_shams
def body(self): self.load_toc() self.click_toc_category("Midrash") self.click_toc_text("Ein Yaakov") self.click_source_title() self.click_masechet_and_chapter('2', '3') section = self.get_section_txt('1') assert 'רבי זירא הוה' in strip_nikkud(section) self.load_toc() self.click_toc_category("Midrash").click_toc_text("Midrash Mishlei") self.click_source_title() self.click_chapter('4') section = self.get_section_txt('1') assert 'מכל משמר נצור ליבך' in section
def create_section(oref, dicta_text, dicta_vtitle): with_nikkud, without_nukkud = dicta_text.split(), strip_nikkud( dicta_text).split() sefaria_text = prepare_sefaria_text(oref) dh_match = match_text(without_nukkud, sefaria_text) matches = dh_match['matches'] segments = oref.all_segment_refs() assert len(segments) == len(matches) for segment, match in zip(segments, matches): tc = segment.text('he', dicta_vtitle) new_segment_text = u' '.join(with_nikkud[match[0]:match[1] + 1]) if not new_segment_text: new_segment_text = segment.text('he', davidson_vtitle).text tc.text = new_segment_text tc.save()
def count_terms(query={}, lang=None): #todo: move to object model. Maybe. What's this doing? """ Counts all terms in texts matching query, lang Saves reults to terms collection in db. """ terms = {} bavli_names = db.index.find(query).distinct("title") query = {"title": {"$in": bavli_names}} refs = counts.generate_refs_list( query) #library.ref_list() needs query argument lookup_lang = "he" if lang == "ar" else lang for ref in refs: print(ref) #text = texts.get_text(ref, commentary=False) text = TextFamily(Ref(ref), commentary=False).contents() for i, line in enumerate(text.get(lookup_lang, [])): # strip punctuation for c in string.punctuation: line = line.replace(c, "") these_terms = line.split(" ") for term in these_terms: line_ref = "%s:%d" % (ref, i + 1) term = hebrew.strip_nikkud(term) if term in terms: terms[term]["occurrences"] += 1 terms[term]["refs"].add(line_ref) else: terms[term] = { "term": term, "occurrences": 1, "language": lang, "refs": set([line_ref]) } for term in terms: print(term) # only include up to 20 random ref samples sample_size = len( terms[term]["refs"]) if len(terms[term]["refs"]) < 20 else 20 terms[term]["refs"] = list(sample(terms[term]["refs"], sample_size)) db.terms.save(terms[term])
def count_terms(query={}, lang=None): #todo: move to object model. Maybe. What's this doing? """ Counts all terms in texts matching query, lang Saves reults to terms collection in db. """ terms = {} bavli_names = db.index.find(query).distinct("title") query = {"title": {"$in": bavli_names}} refs = counts.generate_refs_list(query) #library.ref_list() needs query argument lookup_lang = "he" if lang == "ar" else lang for ref in refs: print ref #text = texts.get_text(ref, commentary=False) text = TextFamily(Ref(ref), commentary=False).contents() for i, line in enumerate(text.get(lookup_lang, [])): # strip punctuation for c in string.punctuation: line = line.replace(c,"") these_terms = line.split(" ") for term in these_terms: line_ref = "%s:%d" % (ref, i+1) term = hebrew.strip_nikkud(term) if term in terms: terms[term]["occurrences"] += 1 terms[term]["refs"].add(line_ref) else: terms[term] = { "term": term, "occurrences": 1, "language": lang, "refs": set([line_ref]) } for term in terms: print term # only include up to 20 random ref samples sample_size = len(terms[term]["refs"]) if len(terms[term]["refs"]) < 20 else 20 terms[term]["refs"] = list(sample(terms[term]["refs"], sample_size)) db.terms.save(terms[term])
def find_all_shams_in_st(self, st, lang = 'he'): ''' :param st: source text :param lang: :return: a list of tuples (Refs objects that originally were Shams, location) ''' from sefaria.utils.hebrew import strip_nikkud st = strip_nikkud(st) sham_refs = [] reg = u'(\(|\([^)]* )שם(\)| [^(]*\))' # finds shams in parenthesis without רבשם for sham in re.finditer(reg, st): matched = sham.group() if len(re.split('\s+', matched)) > 6: # todo: find stitistics of the cutoff size of a ref-citation 6 is a guess continue try: sham_refs += [(self.parse_sham(matched),sham.span())] except IbidKeyNotFoundException: pass except IbidRefException: pass # maybe want to call ignore her? return sham_refs
def run_shaminator(titles=None, with_real_refs=False, SEG_DIST = 5, create_ref_dict = True): base_url = u"https://www.sefaria.org/" title_list = [] cats = ["Midrash", "Halakha", "Philosophy"] collective_titles = ["Rashi", "Kessef Mishneh"] for cat in cats: title_list += library.get_indexes_in_category(cat) for cTitle in collective_titles: title_list += library.get_indices_by_collective_title(cTitle) title_list = titles for ititle, title in enumerate(title_list): print u"-"*50 print title, ititle+1, '/', len(title_list) print u"-"*50 html = u""" <!DOCTYPE html> <html> <head> <link rel='stylesheet' type='text/css' href='styles.css'> <meta charset='utf-8'> </head> <body> <table> <tr><td>Row Id</td><td>Book Ref</td><td>Ref Found</td><td>Sham Found</td><td>Sham Text</td></tr> """ index = library.get_index(title) inst = IndexIbidFinder(index) if create_ref_dict: try: ref_dict = inst.find_in_index() # ref_dict - OrderedDict. keys: segments. values: dict {'refs': [Refs obj found in this seg], 'locations': [], 'types': []} except AssertionError: print "Skipping {}".format(title) continue # problem with Ein Ayah last_index_ref_seen = {} row_num = 1 char_padding = 20 double_tanakh_books = {"I Samuel": "Samuel", "II Samuel": "Samuel", "I Kings": "Kings", "II Kings": "Kings", "I Chronicles": "Chronicles", "II Chronicles": "Chronicles"} for k, v in ref_dict.items(): curr_ref = Ref(k) for i, (r, l, t) in enumerate(izip(v['refs'], v['locations'], v['types'])): sham_ref_key = r.index.title if r.index.title not in double_tanakh_books else double_tanakh_books[ r.index.title] if t == CitationFinder.SHAM_INT and last_index_ref_seen[sham_ref_key] is not None: last_ref_with_citation, last_location_with_citation, last_ref_seen = last_index_ref_seen[sham_ref_key] else: # if t == CitationFinder.REF_INT: last_index_ref_seen[sham_ref_key] = (curr_ref, l, r) if not with_real_refs: continue dist = curr_ref.distance(last_ref_with_citation) last_ref_with_citation = curr_ref last_location_with_citation = l last_ref_seen = r r = u"N/A" # dist = curr_ref.distance(last_ref_with_citation) print dist if dist == 0: text = strip_nikkud(curr_ref.text('he').text) start_ind = 0 if last_location_with_citation[0] - char_padding < 0 else last_location_with_citation[ 0] - char_padding end_ind = l[1] + char_padding before = text[start_ind:last_location_with_citation[0]] real_ref = text[last_location_with_citation[0]:last_location_with_citation[1]] middle = text[last_location_with_citation[1]:l[0]] if last_location_with_citation[1] <= l[0] else u"" sham_ref = text[l[0]:l[1]] if t == CitationFinder.SHAM_INT else u"" after = text[l[1]:end_ind] text = u"{}<span class='r'>{}</span>{}<span class='s'>{}</span>{}".format(before, real_ref, middle, sham_ref, after) else: start_text = strip_nikkud(last_ref_with_citation.text('he').text) # start_text = strip_nikkud(start_text)[last_location_with_citation[0]:] end_text = strip_nikkud(curr_ref.text('he').text) # end_text = strip_nikkud(end_text)[:l[1]+1] if dist > SEG_DIST: continue elif dist > 1 and dist <= SEG_DIST: print u"{} {} {}".format(curr_ref, last_ref_with_citation.next_segment_ref(), curr_ref.prev_segment_ref()) mid_text = last_ref_with_citation.next_segment_ref().to(curr_ref.prev_segment_ref()).text( 'he').text while isinstance(mid_text, list): mid_text = reduce(lambda a, b: a + b, mid_text) else: mid_text = u"" start_ind = 0 if last_location_with_citation[0] - char_padding < 0 else last_location_with_citation[ 0] - char_padding end_ind = l[1] + char_padding start_before = start_text[start_ind:last_location_with_citation[0]] start_real_ref = start_text[last_location_with_citation[0]:last_location_with_citation[1]] start_after = start_text[last_location_with_citation[1]:] end_before = end_text[:l[0]] end_sham_ref = end_text[l[0]:l[1]] end_after = end_text[l[1]:end_ind] text = u"{}<span class='r'>{}</span>{} {} {}<span class='s'>{}</span>{}".format(start_before, start_real_ref, start_after, mid_text, end_before, end_sham_ref, end_after) text = bleach.clean(text, strip=True, tags=[u'span'], attributes=[u'class']) # surround all non interesting parens with spans text = re.sub(ur"(?<!>)(\([^)]+\))(?!<)", ur"<span class='p'>\1</span>", text) rowclass = u"realrefrow" if t == CitationFinder.REF_INT else u"shamrefrow" row = u"<tr class='{}' ><td>{}</td><td><a href='{}' target='_blank'>{}</a></td><td>{}</td><td>{}</td><td class='he'>{}</td></tr>"\ .format(rowclass, row_num, base_url + curr_ref.url(), k, last_ref_seen, r, text) html += row row_num += 1 html += u""" </table> </body> </html> """ with codecs.open('ibid_output/ibid_{}.html'.format(title), 'wb', encoding='utf8') as f: f.write(html)
import sefaria.tracker as tracker patterns = [u"כתרגומו", u"ותרגומו", u"תרגומו", u"וזהו שתרגם אונקלוס", u"אונקלוס", u"לכך מתרגם", u"מתרגם"] books = ["Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy"] total = 0 for book in books: rashi_book = "Rashi on " + book onkelos_book = "Onkelos " + book i = library.get_index(rashi_book) assert isinstance(i, CommentaryIndex) all_rashis = i.all_segment_refs() # Loop through all of the Rashis for rashi_ref in all_rashis: rashi = strip_nikkud(TextChunk(rashi_ref, "he", "On Your Way").text) # If it matches the pattern for pat in patterns: if pat in rashi: onkelos_ref = Ref(rashi_ref.section_ref().normal().replace(rashi_book, onkelos_book)) d = { "refs": [rashi_ref.normal(), onkelos_ref.normal()], "type": "reference", "auto": True, "generated_by": "Rashi - Onkelos Linker", } tracker.add(28, Link, d) print u"{}\t{}\t{}".format(rashi_ref.normal(), pat, rashi.strip()) total += 1 break
def test_strip_nikkud(self): assert h.strip_nikkud('הַדְּבָרִים אֲשֶׁר') == 'הדברים אשר' assert h.strip_nikkud( "הַמּוֹצִיא בְמִסְפָּר צְבָאָם לְכֻלָּם בְּשֵׁם יִקְרָא" ) == "המוציא במספר צבאם לכלם בשם יקרא"
def run_shaminator(titles=None, with_real_refs=False): base_url = u"https://www.sefaria.org/" title_list = [] cats = ["Midrash", "Halakha", "Philosophy"] collective_titles = ["Rashi", "Kessef Mishneh"] for cat in cats: title_list += library.get_indexes_in_category(cat) for cTitle in collective_titles: title_list += library.get_indices_by_collective_title(cTitle) title_list = titles for ititle, title in enumerate(title_list): print u"-"*50 print title, ititle+1, '/', len(title_list) print u"-"*50 html = u""" <!DOCTYPE html> <html> <head> <link rel='stylesheet' type='text/css' href='styles.css'> <meta charset='utf-8'> </head> <body> <table> <tr><td>Row Id</td><td>Book Ref</td><td>Ref Found</td><td>Sham Found</td><td>Sham Text</td></tr> """ index = library.get_index(title) inst = IndexIbidFinder(index) try: ref_dict = inst.find_in_index() except AssertionError: print "Skipping {}".format(title) continue # problem with Ein Ayah last_index_ref_seen = {} row_num = 1 char_padding = 20 double_tanakh_books = {"I Samuel": "Samuel", "II Samuel": "Samuel", "I Kings": "Kings", "II Kings": "Kings", "I Chronicles": "Chronicles", "II Chronicles": "Chronicles"} for k, v in ref_dict.items(): curr_ref = Ref(k) for r, l, t in izip(v['refs'], v['locations'], v['types']): sham_ref_key = r.index.title if r.index.title not in double_tanakh_books else double_tanakh_books[ r.index.title] if t == CitationFinder.SHAM_INT and last_index_ref_seen[sham_ref_key] is not None: last_ref_with_citation, last_location_with_citation, last_ref_seen = last_index_ref_seen[sham_ref_key] else: # if t == CitationFinder.REF_INT: last_index_ref_seen[sham_ref_key] = (curr_ref, l, r) if not with_real_refs: continue last_ref_with_citation = curr_ref last_location_with_citation = l last_ref_seen = r r = u"N/A" dist = curr_ref.distance(last_ref_with_citation) if dist == 0: text = strip_nikkud(curr_ref.text('he').text) start_ind = 0 if last_location_with_citation[0] - char_padding < 0 else last_location_with_citation[ 0] - char_padding end_ind = l[1] + char_padding before = text[start_ind:last_location_with_citation[0]] real_ref = text[last_location_with_citation[0]:last_location_with_citation[1]] middle = text[last_location_with_citation[1]:l[0]] if last_location_with_citation[1] <= l[0] else u"" sham_ref = text[l[0]:l[1]] if t == CitationFinder.SHAM_INT else u"" after = text[l[1]:end_ind] text = u"{}<span class='r'>{}</span>{}<span class='s'>{}</span>{}".format(before, real_ref, middle, sham_ref, after) else: start_text = strip_nikkud(last_ref_with_citation.text('he').text) # start_text = strip_nikkud(start_text)[last_location_with_citation[0]:] end_text = strip_nikkud(curr_ref.text('he').text) # end_text = strip_nikkud(end_text)[:l[1]+1] if dist > 1: print u"{} {} {}".format(curr_ref, last_ref_with_citation.next_segment_ref(), curr_ref.prev_segment_ref()) mid_text = last_ref_with_citation.next_segment_ref().to(curr_ref.prev_segment_ref()).text( 'he').text while isinstance(mid_text, list): mid_text = reduce(lambda a, b: a + b, mid_text) else: mid_text = u"" start_ind = 0 if last_location_with_citation[0] - char_padding < 0 else last_location_with_citation[ 0] - char_padding end_ind = l[1] + char_padding start_before = start_text[start_ind:last_location_with_citation[0]] start_real_ref = start_text[last_location_with_citation[0]:last_location_with_citation[1]] start_after = start_text[last_location_with_citation[1]:] end_before = end_text[:l[0]] end_sham_ref = end_text[l[0]:l[1]] end_after = end_text[l[1]:end_ind] text = u"{}<span class='r'>{}</span>{} {} {}<span class='s'>{}</span>{}".format(start_before, start_real_ref, start_after, mid_text, end_before, end_sham_ref, end_after) text = bleach.clean(text, strip=True, tags=[u'span'], attributes=[u'class']) # surround all non interesting parens with spans text = re.sub(ur"(?<!>)(\([^)]+\))(?!<)", ur"<span class='p'>\1</span>", text) rowclass = u"realrefrow" if t == CitationFinder.REF_INT else u"shamrefrow" row = u"<tr class='{}' ><td>{}</td><td><a href='{}' target='_blank'>{}</a></td><td>{}</td><td>{}</td><td class='he'>{}</td></tr>"\ .format(rowclass, row_num, base_url + curr_ref.url(), k, last_ref_seen, r, text) html += row row_num += 1 html += u""" </table> </body> </html> """ with codecs.open('ibid_output/ibid_{}.html'.format(title), 'wb',encoding='utf8') as f: f.write(html)
for sheet in sheet_list: for tag in sheet.get("tags", []): original_tag_counter[tag] += 1 sorted_tags = sorted(original_tag_counter, key=original_tag_counter.get, reverse=True) sorted_en_tags = [t for t in sorted_tags if not is_hebrew(t)] sorted_he_tags = [t for t in sorted_tags if is_hebrew(t)] translated_hebrew_tags = defaultdict(list) for en_tag in sorted_en_tags: translation = translate_client.translate(en_tag, target_language='iw', source_language='en') he_tag = strip_nikkud(translation['translatedText']) if en_tag == he_tag: print("Couldn't translate {}".format(en_tag)) untranslated_en_tags += [en_tag] continue print("{}:{}".format(he_tag, en_tag)) translated_hebrew_tags[he_tag] += [en_tag] overall_counts = { he_tag: sum([ original_tag_counter[en_tag] for en_tag in translated_hebrew_tags[he_tag] ]) for he_tag in translated_hebrew_tags } ordered_translated_he_terms = sorted(overall_counts,
u"כתרגומו", u"ותרגומו", u"תרגומו", u"וזהו שתרגם אונקלוס", u"אונקלוס", u"לכך מתרגם", u"מתרגם" ] books = ["Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy"] total = 0 for book in books: rashi_book = "Rashi on " + book onkelos_book = "Onkelos " + book i = library.get_index(rashi_book) assert isinstance(i, CommentaryIndex) all_rashis = i.all_segment_refs() # Loop through all of the Rashis for rashi_ref in all_rashis: rashi = strip_nikkud(TextChunk(rashi_ref, "he", "On Your Way").text) # If it matches the pattern for pat in patterns: if pat in rashi: onkelos_ref = Ref(rashi_ref.section_ref().normal().replace( rashi_book, onkelos_book)) d = { "refs": [rashi_ref.normal(), onkelos_ref.normal()], "type": "reference", "auto": True, "generated_by": "Rashi - Onkelos Linker" } tracker.add(28, Link, d) print u"{}\t{}\t{}".format(rashi_ref.normal(), pat,
def text_strip_nikkud(self): assert h.strip_nikkud(u'הַדְּבָרִים אֲשֶׁר') == u'הדברים אשר'
def test_strip_nikkud(self): assert h.strip_nikkud(u'הַדְּבָרִים אֲשֶׁר') == u'הדברים אשר' assert h.strip_nikkud(u"הַמּוֹצִיא בְמִסְפָּר צְבָאָם לְכֻלָּם בְּשֵׁם יִקְרָא") == u"המוציא במספר צבאם לכלם בשם יקרא"