def set_index(chapter: Chapter, indexes: List[int], depth: int) -> List[int]: if len(indexes) < depth + 1: indexes.append(0) if has_verses(chapter): verse_local_index = 0 for verse in chapter.verses: if verse.part_type == PartType.Hadith or verse.part_type == PartType.Verse: indexes[depth] = indexes[depth] + 1 verse.index = indexes[depth] verse_local_index = verse_local_index + 1 verse.local_index = verse_local_index verse.path = chapter.path + ":" + str(verse_local_index) chapter.verse_count = indexes[depth] - chapter.verse_start_index report_numbering = True sequence = None if has_chapters(chapter): chapter_local_index = 0 for subchapter in chapter.chapters: indexes[depth] = indexes[depth] + 1 subchapter.index = indexes[depth] chapter_local_index = chapter_local_index + 1 subchapter.local_index = chapter_local_index subchapter.path = chapter.path + ":" + str(chapter_local_index) subchapter.verse_start_index = indexes[-1] if report_numbering and subchapter.part_type == PartType.Chapter: chapter_number_str = CHAPTER_TITLE_PATTERN.search( subchapter.titles['en']) if chapter_number_str: chapter_number = int(chapter_number_str.group(1)) if sequence and sequence + 1 != chapter_number: error_msg = 'Chapter ' + str( chapter_local_index) + ' with indexes ' + str( indexes) + ' does not match title ' + str( subchapter.titles) print(error_msg) SEQUENCE_ERRORS.append(error_msg) # raise Exception('Chapter ' + str(chapter_local_index) + ' with indexes ' + str(indexes) + ' does not match title ' + str(subchapter.titles)) sequence = chapter_number # if chapter_number != chapter_local_index: # print('Chapter ' + str(chapter_local_index) + ' with indexes ' + str(indexes) + ' does not match title ' + str(subchapter.titles)) # report_numbering = False # raise Exception('Chapter ' + str(chapter_local_index) + ' with indexes ' + str(indexes) + ' does not match title ' + str(subchapter.titles)) subchapter.crumbs = copy.copy(chapter.crumbs) crumb = Crumb() crumb.indexed_titles = { Language.EN.value: subchapter.part_type.name + ' ' + str(subchapter.local_index) } crumb.titles = subchapter.titles crumb.path = subchapter.path subchapter.crumbs.append(crumb) indexes = set_index(subchapter, indexes, depth + 1) chapter.verse_count = indexes[-1] - chapter.verse_start_index return indexes
def build_quran() -> Chapter: verses = build_verses(get_path("tanzil_net/quran_simple.txt")) insert_quran_translation(verses, get_path("tanzil_net/translations/fa.ansarian.txt"), "ansarian", "fa", "Hussain Ansarian", "https://fa.wikipedia.org/wiki/%D8%AD%D8%B3%DB%8C%D9%86_%D8%A7%D9%86%D8%B5%D8%A7%D8%B1%DB%8C%D8%A7%D9%86") insert_quran_translation(verses, get_path("tanzil_net/translations/fa.ayati.txt"), "ayati", "fa", "AbdolMohammad Ayati", "https://fa.wikipedia.org/wiki/%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%85%D8%AD%D9%85%D8%AF_%D8%A2%DB%8C%D8%AA%DB%8C") insert_quran_translation(verses, get_path("tanzil_net/translations/fa.bahrampour.txt"), "bahrampour", "fa", "Abolfazl Bahrampour", "https://fa.wikipedia.org/wiki/%D8%A7%D8%A8%D9%88%D8%A7%D9%84%D9%81%D8%B6%D9%84_%D8%A8%D9%87%D8%B1%D8%A7%D9%85%E2%80%8C%D9%BE%D9%88%D8%B1") insert_quran_translation(verses, get_path("tanzil_net/translations/fa.fooladvand.txt"), "fooladvand", "fa", "Mohammad Mahdi Fooladvand", "https://fa.wikipedia.org/wiki/%D9%85%D8%AD%D9%85%D8%AF%D9%85%D9%87%D8%AF%DB%8C_%D9%81%D9%88%D9%84%D8%A7%D8%AF%D9%88%D9%86%D8%AF") insert_quran_translation(verses, get_path("tanzil_net/translations/fa.ghomshei.txt"), "ghomshei", "fa", "Mahdi Elahi Ghomshei", "https://fa.wikipedia.org/wiki/%D9%85%D9%87%D8%AF%DB%8C_%D8%A7%D9%84%D9%87%DB%8C_%D9%82%D9%85%D8%B4%D9%87%E2%80%8C%D8%A7%DB%8C") insert_quran_translation(verses, get_path("tanzil_net/translations/fa.khorramdel.txt"), "khorramdel", "fa", "Mostafa Khorramdel", "https://rasekhoon.net/mashahir/Show-904328.aspx") insert_quran_translation(verses, get_path("tanzil_net/translations/fa.khorramshahi.txt"), "khorramshahi", "fa", "Baha'oddin Khorramshahi", "https://fa.wikipedia.org/wiki/%D8%A8%D9%87%D8%A7%D8%A1%D8%A7%D9%84%D8%AF%DB%8C%D9%86_%D8%AE%D8%B1%D9%85%D8%B4%D8%A7%D9%87%DB%8C") insert_quran_translation(verses, get_path("tanzil_net/translations/fa.makarem.txt"), "makarem", "fa", "Naser Makarem Shirazi", "https://en.wikipedia.org/wiki/Naser_Makarem_Shirazi") insert_quran_translation(verses, get_path("tanzil_net/translations/fa.moezzi.txt"), "moezzi", "fa", "Mohammad Kazem Moezzi", "") insert_quran_translation(verses, get_path("tanzil_net/translations/fa.mojtabavi.txt"), "mojtabavi", "fa", "Sayyed Jalaloddin Mojtabavi", "http://rasekhoon.net/mashahir/Show-118481.aspx") insert_quran_translation(verses, get_path("tanzil_net/translations/fa.sadeqi.txt"), "sadeqi", "fa", "Mohammad Sadeqi Tehrani", "https://fa.wikipedia.org/wiki/%D9%85%D8%AD%D9%85%D8%AF_%D8%B5%D8%A7%D8%AF%D9%82%DB%8C_%D8%AA%D9%87%D8%B1%D8%A7%D9%86%DB%8C") insert_quran_translation(verses, get_path("tanzil_net/translations/en.ahmedali.txt"), "ahmedali", "en", "Ahmed Ali", "https://en.wikipedia.org/wiki/Ahmed_Ali_(writer)") insert_quran_translation(verses, get_path("tanzil_net/translations/en.ahmedraza.txt"), "ahmedraza", "en", "Ahmed Raza Khan", "https://en.wikipedia.org/wiki/Ahmed_Raza_Khan_Barelvi") insert_quran_translation(verses, get_path("tanzil_net/translations/en.arberry.txt"), "arberry", "en", "A. J. Arberry", "https://en.wikipedia.org/wiki/Arthur_John_Arberry") insert_quran_translation(verses, get_path("tanzil_net/translations/en.daryabadi.txt"), "daryabadi", "en", "Abdul Majid Daryabadi", "https://en.wikipedia.org/wiki/Abdul_Majid_Daryabadi") insert_quran_translation(verses, get_path("tanzil_net/translations/en.hilali.txt"), "hilali", "en", "Muhammad Taqi-ud-Din al-Hilali and Muhammad Muhsin Khan", "https://en.wikipedia.org/wiki/Noble_Quran_(Hilali-Khan)") insert_quran_translation(verses, get_path("tanzil_net/translations/en.itani.txt"), "itani", "en", "Talal Itani", "") insert_quran_translation(verses, get_path("tanzil_net/translations/en.maududi.txt"), "maududi", "en", "Abul Ala Maududi", "https://en.wikipedia.org/wiki/Abul_A%27la_Maududi") insert_quran_translation(verses, get_path("tanzil_net/translations/en.mubarakpuri.txt"), "mubarakpuri", "en", "Safi-ur-Rahman al-Mubarakpuri", "https://en.wikipedia.org/wiki/Safiur_Rahman_Mubarakpuri") insert_quran_translation(verses, get_path("tanzil_net/translations/en.pickthall.txt"), "pickthall", "en", "Mohammed Marmaduke William Pickthall", "https://en.wikipedia.org/wiki/Marmaduke_Pickthall") insert_quran_translation(verses, get_path("tanzil_net/translations/en.qarai.txt"), "qarai", "en", "Ali Quli Qarai", "") insert_quran_translation(verses, get_path("tanzil_net/translations/en.qaribullah.txt"), "qaribullah", "en", "Hasan al-Fatih Qaribullah and Ahmad Darwish", "") insert_quran_translation(verses, get_path("tanzil_net/translations/en.sahih.txt"), "sahih", "en", "Saheeh International", "http://www.saheehinternational.com/") insert_quran_translation(verses, get_path("tanzil_net/translations/en.sarwar.txt"), "sarwar", "en", "Muhammad Sarwar", "https://en.wikipedia.org/wiki/Shaykh_Muhammad_Sarwar") insert_quran_translation(verses, get_path("tanzil_net/translations/en.shakir.txt"), "shakir", "en", "Mohammad Habib Shakir", "https://en.wikipedia.org/wiki/Muhammad_Habib_Shakir") insert_quran_translation(verses, get_path("tanzil_net/translations/en.transliteration.txt"), "transliteration", "en", "English Transliteration", "") insert_quran_translation(verses, get_path("tanzil_net/translations/en.wahiduddin.txt"), "wahiduddin", "en", "Wahiduddin Khan", "https://en.wikipedia.org/wiki/Wahiduddin_Khan") insert_quran_translation(verses, get_path("tanzil_net/translations/en.yusufali.txt"), "yusufali", "en", "Abdullah Yusuf Ali", "https://en.wikipedia.org/wiki/Abdullah_Yusuf_Ali") chapters = build_chapters(get_path("tanzil_net/quran-data.xml"), verses) q = Chapter() q.index = BOOK_INDEX q.path = BOOK_PATH q.verse_start_index = 0 q.part_type = PartType.Book q.titles = { Language.EN.value: "The Holy Quran", Language.AR.value: "القرآن الكريم" } q.descriptions = { Language.EN.value: "Was revealed to the prophet SAW" } q.chapters=chapters crumb = Crumb() crumb.titles = q.titles crumb.indexed_titles = q.titles crumb.path = q.path q.crumbs = [crumb] set_index(q, [0, 0], 0) return q
def build_chapters(file: str, verses: List[Verse]) -> List[Chapter]: chapters: List[Chapter] = [] quran = xml.etree.ElementTree.parse(file).getroot() suras = quran.find('suras') for s in suras.findall('sura'): meta = s.attrib index=int(meta['index']) ayas=int(meta['ayas']) start=int(meta['start']) name=meta['name'] tname=meta['tname'] ename=meta['ename'] type=meta['type'] order=int(meta['order']) rukus=int(meta['rukus']) titles = { Language.AR.value: name, Language.EN.value: ename, Language.ENT.value: tname } sura = Chapter() # sura.index=index # sura.path=BOOK_PATH + ":" + str(index) sura.part_type = PartType.Chapter sura.titles=titles # sura.verse_count=ayas # sura.verse_start_index=start sura.reveal_type=type sura.order=order sura.rukus=rukus sura.verses=verses[start:ayas+start] # set verse path # for verse in sura.verses: # verse.path=sura.path + ":" + str(verse.index) chapters.append(sura) sajdas = get_sajda_data(quran) for k, v in sajdas.items(): (sura_index, aya_index) = k sajda_chapter = chapters[sura_index - 1] sajda_chapter.sajda_type = v sajda_chapter.verses[aya_index - 1].sajda_type = v # add_group_data(quran, ayaindex, 'juzs', 'juz') # add_group_data(quran, ayaindex, 'hizbs', 'quarter') # add_group_data(quran, ayaindex, 'manzils', 'manzil') # add_group_data(quran, ayaindex, 'rukus', 'ruku') # add_group_data(quran, ayaindex, 'pages', 'page') return chapters
def build_volume(file, title_en: str, title_ar: str, description: str, last_volume: bool = False) -> Chapter: volume = Chapter() volume.titles = {Language.EN.value: title_en, Language.AR.value: title_ar} volume.descriptions = {Language.EN.value: description} if last_volume: volume.chapters = build_hubeali_book_8(file) else: volume.chapters = build_hubeali_books(file) volume.part_type = PartType.Volume return volume
def build_alhassanain_baabs(file) -> List[Chapter]: baabs: List[Chapter] = [] logger.info("Adding Al-Kafi file %s", file) with open(file, 'r', encoding='utf8') as qfile: inner_html = qfile.read() sections = inner_html.split("<br clear=all>") for section in sections: section_soup = BeautifulSoup(section, 'html.parser') headings = section_soup.select(".Heading1Center") if not headings: continue # process "the book of" chapter baab_titles = extract_headings(headings) en_title = baab_titles[Language.EN.value] baab = None for existing_baab in baabs: if existing_baab.titles[Language.EN.value] == en_title: baab = existing_baab if not baab: baab = Chapter() baab.part_type = PartType.Book baab.titles = baab_titles baab.chapters = [] baabs.append(baab) # process chapters chapters = section_soup.select(".Heading2Center") chapters_len = len(chapters) for subchapter_index in range(math.ceil(chapters_len / 2)): subchapter_heading_index = subchapter_index * 2 remaining_chapters = chapters[subchapter_heading_index:] if len(remaining_chapters) > 1: remaining_chapters = remaining_chapters[:2] chapter_titles = extract_headings(remaining_chapters) chapter = Chapter() chapter.part_type = PartType.Chapter chapter.titles = chapter_titles chapter.verses = [] baab.chapters.append(chapter) last_element = remaining_chapters[-1] last_element = last_element.next_sibling verse: Verse = None while (last_element is not None and (isinstance(last_element, NavigableString) or (isinstance(last_element, Tag) and 'Heading2Center' not in last_element['class']))): is_tag = isinstance(last_element, Tag) if is_tag and 'libAr' in last_element['class']: # push the last verse if its not the start of chapter if verse != None: chapter.verses.append(verse) verse = Verse() verse.part_type = PartType.Hadith translation = Translation() translation.name = "hubeali" translation.lang = Language.EN.value translation.text = None verse.translations = [translation] verse.text = last_element.get_text(strip=True) if is_tag and 'libNormal' in last_element['class']: if verse.translations[0].text: verse.translations[0].text = verse.translations[ 0].text + "\n" + last_element.get_text( strip=True) else: verse.translations[0].text = last_element.get_text( strip=True) last_element = last_element.next_sibling if verse != None: chapter.verses.append(verse) return baabs
def build_kafi() -> Chapter: kafi = Chapter() kafi.index = BOOK_INDEX kafi.path = BOOK_PATH kafi.titles = {Language.EN.value: "Al-Kafi", Language.AR.value: "الكافي"} kafi.descriptions = { Language.EN.value: "Of the majestic narrator and the scholar, the jurist, the Sheykh Muhammad Bin Yaqoub Al-Kulayni Well known as ‘The trustworthy of Al-Islam Al-Kulayni’ Who died in the year 329 H" } kafi.chapters = [] kafi.chapters.append( build_volume(get_path("hubeali_com\\Al-Kafi-Volume-1\\"), "Volume One", "الجزء الأول", "First volume of Al-Kafi")) kafi.chapters.append( build_volume(get_path("hubeali_com\\Al-Kafi-Volume-2\\"), "Volume Two", "الجزء الثاني", "Second volume of Al-Kafi")) kafi.chapters.append( build_volume(get_path("hubeali_com\\Al-Kafi-Volume-3\\"), "Volume Three", "الجزء الثالث", "Third volume of Al-Kafi")) kafi.chapters.append( build_volume(get_path("hubeali_com\\Al-Kafi-Volume-4\\"), "Volume Four", "الجزء الرابع", "Forth volume of Al-Kafi")) kafi.chapters.append( build_volume(get_path("hubeali_com\\Al-Kafi-Volume-5\\"), "Volume Five", "الجزء الخامس", "Fifth volume of Al-Kafi")) kafi.chapters.append( build_volume(get_path("hubeali_com\\Al-Kafi-Volume-6\\"), "Volume Six", "الجزء السادس", "Sixth volume of Al-Kafi")) kafi.chapters.append( build_volume(get_path("hubeali_com\\Al-Kafi-Volume-7\\"), "Volume Seven", "الجزء السابع", "Seventh volume of Al-Kafi")) kafi.chapters.append( build_volume(get_path("hubeali_com\\Al-Kafi-Volume-8\\"), "Volume Eight", "الجزء الثامن", "Eighth volume of Al-Kafi", True)) # kafi.chapters.append(build_volume( # get_path("alhassanain_org\\hubeali_com_usul_kafi_v_01_ed_html\\usul_kafi_v_01_ed.htm"), # "Volume 1", # "جلد اول", # "First volume of Al-Kafi")) # kafi.chapters.append(build_volume( # get_path("alhassanain_org\\hubeali_com_usul_kafi_v_02_ed_html\\usul_kafi_v_02_ed.htm"), # "Volume 2", # "جلد 2", # "Second volume of Al-Kafi")) # kafi.chapters.append(build_volume( # get_path("alhassanain_org\\hubeali_com_usul_kafi_v_03_ed_html\\usul_kafi_v_03_ed.htm"), # "Volume 3", # "جلد 3", # "Third volume of Al-Kafi")) # post_processor(kafi) kafi.verse_start_index = 0 kafi.index = BOOK_INDEX kafi.path = BOOK_PATH crumb = Crumb() crumb.titles = kafi.titles crumb.indexed_titles = kafi.titles crumb.path = kafi.path kafi.crumbs = [crumb] set_index(kafi, [0, 0, 0, 0], 0) return kafi
def build_hubeali_book_8(dirname) -> List[Chapter]: logger.info("Adding Al-Kafi dir %s", dirname) cfiles = glob.glob(dirname + "c*.xhtml") book = Chapter() book.part_type = PartType.Book book.titles = {} # Arabic title comes from previous file book.titles[ Language.AR. value] = "كتاب الرَّوْضَةِ" book.titles[Language.EN.value] = "The Book - Garden (of Flowers)" book.chapters = [] is_the_end = False previous_hadith_num = 14449 chapter = None chapter_title_ar = None hadith_ar = [] hadith_en = [] for cfile in cfiles: if is_the_end: break logger.info("Processing file %s", cfile) with open(cfile, 'r', encoding='utf8') as qfile: file_html = qfile.read() file_html = file_correction(cfile, file_html) soup = BeautifulSoup(file_html, 'html.parser') heading = soup.body.h1 if we_dont_care(heading): continue if table_of_contents(heading): hadith_ar.append(get_contents(soup.body.contents[-2])) continue heading_en = get_contents(heading.a) is_hadith_title = V8_HADITH_TITLE_PATTERN.match(heading_en) # sometimes the anchor is early terminated if not heading_en or is_hadith_title: heading_en = get_contents(heading) if chapter_title_ar or not chapter: chapter = Chapter() chapter.part_type = PartType.Chapter chapter.titles = {} if chapter_title_ar: chapter.titles[Language.AR.value] = chapter_title_ar else: chapter.titles[ Language.AR. value] = "بِسْمِ اللَّهِ الرَّحْمَنِ الرَّحِيمِ" if heading_en: chapter.titles[Language.EN.value] = heading_en else: chapter.titles[ Language.EN. value] = "In the name of Allah, the Beneficent, the Merciful" chapter_title_ar = None chapter.verses = [] book.chapters.append(chapter) elif is_hadith_title: hadith_en.append(heading_en) last_element = soup.find('p', 'first-in-chapter') while last_element: if is_newline(last_element): last_element = last_element.next_sibling continue is_tag = isinstance(last_element, Tag) is_paragraph = is_tag and last_element.name == 'p' is_not_section_break_paragraph = is_paragraph and not is_section_break_tag( last_element) is_arabic = is_arabic_tag(last_element) element_content = get_contents(last_element) element_content = element_content.replace( 'style="font-style: italic; font-weight: bold"', 'class="ibTxt"') element_content = element_content.replace( 'style="font-weight: bold"', 'class="bTxt"') element_content = element_content.replace( 'style="font-style: italic"', 'class="iTxt"') is_new_hadith = V8_HADITH_BEGINNING_PATTERN.match( last_element.get_text(strip=True)) is_the_end = element_content.startswith( "تَمَّ كِتَابُ الرَّوْضَةِ مِنَ" ) # We commit the hadith that has been building up until now if we encounter a new hadith beginning if (is_new_hadith or is_the_end) and hadith_ar and hadith_en: add_hadith(chapter, hadith_ar, hadith_en) hadith_ar = [] hadith_en = [] if is_new_hadith: hadith_num = int(is_new_hadith.group(1)) if previous_hadith_num + 1 != hadith_num: print("Skipped one hadith " + str(previous_hadith_num) + " to " + str(hadith_num) + " title: " + element_content) previous_hadith_num = hadith_num if is_chapter_title(last_element): if hadith_ar and hadith_en: add_hadith(chapter, hadith_ar, hadith_en) hadith_ar = [] hadith_en = [] chapter_title_ar = element_content elif is_arabic: hadith_ar.append(element_content) elif is_not_section_break_paragraph: hadith_en.append(element_content) if is_the_end: add_hadith(chapter, hadith_ar, hadith_en, PartType.Heading) last_element = last_element.next_sibling return [book]
def build_hubeali_books(dirname) -> List[Chapter]: books: List[Chapter] = [] logger.info("Adding Al-Kafi dir %s", dirname) cfiles = glob.glob(dirname + "c*.xhtml") book = None chapter = None book_title_ar = None chapter_title_ar = None hadith_ar = [] hadith_en = [] for cfile in cfiles: logger.info("Processing file %s", cfile) with open(cfile, 'r', encoding='utf8') as qfile: file_html = qfile.read() file_html = file_correction(cfile, file_html) soup = BeautifulSoup(file_html, 'html.parser') heading = soup.body.h1 if we_dont_care(heading): continue if table_of_contents(heading): book_title_ar = get_contents(soup.body.contents[-2]) continue heading_en = get_contents(heading.a) # sometimes the anchor is early terminated if not heading_en: heading_en = get_contents(heading) if book_title_ar: book = Chapter() book.part_type = PartType.Book book.titles = {} # Arabic title comes from previous file book.titles[Language.AR.value] = book_title_ar book.titles[Language.EN.value] = heading_en book_title_ar = None book.chapters = [] books.append(book) elif (chapter_title_ar or not chapter) and heading_en.startswith('Chapter'): chapter = Chapter() chapter.part_type = PartType.Chapter chapter.titles = {} chapter.titles[Language.AR.value] = chapter_title_ar chapter.titles[Language.EN.value] = heading_en chapter_title_ar = None chapter.verses = [] book.chapters.append(chapter) elif chapter_title_ar: add_hadith(chapter, [chapter_title_ar], [heading_en], PartType.Heading) chapter_title_ar = None last_element = soup.find('p', 'first-in-chapter') while last_element: if is_newline(last_element): last_element = last_element.next_sibling continue is_tag = isinstance(last_element, Tag) is_paragraph = is_tag and last_element.name == 'p' is_not_section_break_paragraph = is_paragraph and not is_section_break_tag( last_element) is_arabic = is_arabic_tag(last_element) element_content = get_contents(last_element) element_content = element_content.replace( 'style="font-style: italic; font-weight: bold"', 'class="ibTxt"') element_content = element_content.replace( 'style="font-weight: bold"', 'class="bTxt"') element_content = element_content.replace( 'style="font-style: italic"', 'class="iTxt"') is_end_of_hadith = END_OF_HADITH_PATTERN.search( element_content) if is_book_title(last_element): if hadith_ar and hadith_en: add_hadith(chapter, hadith_ar, hadith_en, PartType.Heading) hadith_ar = [] hadith_en = [] book_title_ar = element_content chapter = None elif is_chapter_title(last_element): if hadith_ar and hadith_en: if chapter: add_hadith(chapter, hadith_ar, hadith_en) else: book.descriptions = {} book.descriptions[Language.AR.value] = join_texts( hadith_ar) book.descriptions[Language.EN.value] = join_texts( hadith_en) hadith_ar = [] hadith_en = [] chapter_title_ar = element_content elif is_arabic: hadith_ar.append(element_content) # elif is_book_ending(last_element): # add_hadith(chapter, hadith_ar, [element_content], PartType.Heading) # hadith_ar = [] # hadith_en = [] elif is_not_section_break_paragraph: hadith_en.append(element_content) if is_end_of_hadith: add_hadith(chapter, hadith_ar, hadith_en) hadith_ar = [] hadith_en = [] last_element = last_element.next_sibling return books