Python Chapter示例，data.models.Chapter Python示例

示例#1

0

显示文件

def set_index(chapter: Chapter, indexes: List[int], depth: int) -> List[int]:
    if len(indexes) < depth + 1:
        indexes.append(0)

    if has_verses(chapter):
        verse_local_index = 0
        for verse in chapter.verses:
            if verse.part_type == PartType.Hadith or verse.part_type == PartType.Verse:
                indexes[depth] = indexes[depth] + 1
                verse.index = indexes[depth]
                verse_local_index = verse_local_index + 1
                verse.local_index = verse_local_index
                verse.path = chapter.path + ":" + str(verse_local_index)
        chapter.verse_count = indexes[depth] - chapter.verse_start_index

    report_numbering = True
    sequence = None
    if has_chapters(chapter):
        chapter_local_index = 0
        for subchapter in chapter.chapters:
            indexes[depth] = indexes[depth] + 1
            subchapter.index = indexes[depth]
            chapter_local_index = chapter_local_index + 1
            subchapter.local_index = chapter_local_index
            subchapter.path = chapter.path + ":" + str(chapter_local_index)
            subchapter.verse_start_index = indexes[-1]

            if report_numbering and subchapter.part_type == PartType.Chapter:
                chapter_number_str = CHAPTER_TITLE_PATTERN.search(
                    subchapter.titles['en'])
                if chapter_number_str:
                    chapter_number = int(chapter_number_str.group(1))
                    if sequence and sequence + 1 != chapter_number:
                        error_msg = 'Chapter ' + str(
                            chapter_local_index) + ' with indexes ' + str(
                                indexes) + ' does not match title ' + str(
                                    subchapter.titles)
                        print(error_msg)
                        SEQUENCE_ERRORS.append(error_msg)
                        # raise Exception('Chapter ' + str(chapter_local_index) + ' with indexes ' + str(indexes) + ' does not match title ' + str(subchapter.titles))
                    sequence = chapter_number
                    # if chapter_number != chapter_local_index:
                    # print('Chapter ' + str(chapter_local_index) + ' with indexes ' + str(indexes) + ' does not match title ' + str(subchapter.titles))
                    # report_numbering = False
                    # raise Exception('Chapter ' + str(chapter_local_index) + ' with indexes ' + str(indexes) + ' does not match title ' + str(subchapter.titles))

            subchapter.crumbs = copy.copy(chapter.crumbs)
            crumb = Crumb()
            crumb.indexed_titles = {
                Language.EN.value:
                subchapter.part_type.name + ' ' + str(subchapter.local_index)
            }
            crumb.titles = subchapter.titles
            crumb.path = subchapter.path
            subchapter.crumbs.append(crumb)

            indexes = set_index(subchapter, indexes, depth + 1)
        chapter.verse_count = indexes[-1] - chapter.verse_start_index

    return indexes

示例#2

0

显示文件

文件： quran.py 项目： narmafraz/ThaqalaynApi

def build_quran() -> Chapter:
	verses = build_verses(get_path("tanzil_net/quran_simple.txt"))

	insert_quran_translation(verses, get_path("tanzil_net/translations/fa.ansarian.txt"), "ansarian", "fa", "Hussain Ansarian", "https://fa.wikipedia.org/wiki/%D8%AD%D8%B3%DB%8C%D9%86_%D8%A7%D9%86%D8%B5%D8%A7%D8%B1%DB%8C%D8%A7%D9%86")
	insert_quran_translation(verses, get_path("tanzil_net/translations/fa.ayati.txt"), "ayati", "fa", "AbdolMohammad Ayati", "https://fa.wikipedia.org/wiki/%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%85%D8%AD%D9%85%D8%AF_%D8%A2%DB%8C%D8%AA%DB%8C")
	insert_quran_translation(verses, get_path("tanzil_net/translations/fa.bahrampour.txt"), "bahrampour", "fa", "Abolfazl Bahrampour", "https://fa.wikipedia.org/wiki/%D8%A7%D8%A8%D9%88%D8%A7%D9%84%D9%81%D8%B6%D9%84_%D8%A8%D9%87%D8%B1%D8%A7%D9%85%E2%80%8C%D9%BE%D9%88%D8%B1")
	insert_quran_translation(verses, get_path("tanzil_net/translations/fa.fooladvand.txt"), "fooladvand", "fa", "Mohammad Mahdi Fooladvand", "https://fa.wikipedia.org/wiki/%D9%85%D8%AD%D9%85%D8%AF%D9%85%D9%87%D8%AF%DB%8C_%D9%81%D9%88%D9%84%D8%A7%D8%AF%D9%88%D9%86%D8%AF")
	insert_quran_translation(verses, get_path("tanzil_net/translations/fa.ghomshei.txt"), "ghomshei", "fa", "Mahdi Elahi Ghomshei", "https://fa.wikipedia.org/wiki/%D9%85%D9%87%D8%AF%DB%8C_%D8%A7%D9%84%D9%87%DB%8C_%D9%82%D9%85%D8%B4%D9%87%E2%80%8C%D8%A7%DB%8C")
	insert_quran_translation(verses, get_path("tanzil_net/translations/fa.khorramdel.txt"), "khorramdel", "fa", "Mostafa Khorramdel", "https://rasekhoon.net/mashahir/Show-904328.aspx")
	insert_quran_translation(verses, get_path("tanzil_net/translations/fa.khorramshahi.txt"), "khorramshahi", "fa", "Baha'oddin Khorramshahi", "https://fa.wikipedia.org/wiki/%D8%A8%D9%87%D8%A7%D8%A1%D8%A7%D9%84%D8%AF%DB%8C%D9%86_%D8%AE%D8%B1%D9%85%D8%B4%D8%A7%D9%87%DB%8C")
	insert_quran_translation(verses, get_path("tanzil_net/translations/fa.makarem.txt"), "makarem", "fa", "Naser Makarem Shirazi", "https://en.wikipedia.org/wiki/Naser_Makarem_Shirazi")
	insert_quran_translation(verses, get_path("tanzil_net/translations/fa.moezzi.txt"), "moezzi", "fa", "Mohammad Kazem Moezzi", "")
	insert_quran_translation(verses, get_path("tanzil_net/translations/fa.mojtabavi.txt"), "mojtabavi", "fa", "Sayyed Jalaloddin Mojtabavi", "http://rasekhoon.net/mashahir/Show-118481.aspx")
	insert_quran_translation(verses, get_path("tanzil_net/translations/fa.sadeqi.txt"), "sadeqi", "fa", "Mohammad Sadeqi Tehrani", "https://fa.wikipedia.org/wiki/%D9%85%D8%AD%D9%85%D8%AF_%D8%B5%D8%A7%D8%AF%D9%82%DB%8C_%D8%AA%D9%87%D8%B1%D8%A7%D9%86%DB%8C")

	insert_quran_translation(verses, get_path("tanzil_net/translations/en.ahmedali.txt"), "ahmedali", "en", "Ahmed Ali", "https://en.wikipedia.org/wiki/Ahmed_Ali_(writer)")
	insert_quran_translation(verses, get_path("tanzil_net/translations/en.ahmedraza.txt"), "ahmedraza", "en", "Ahmed Raza Khan", "https://en.wikipedia.org/wiki/Ahmed_Raza_Khan_Barelvi")
	insert_quran_translation(verses, get_path("tanzil_net/translations/en.arberry.txt"), "arberry", "en", "A. J. Arberry", "https://en.wikipedia.org/wiki/Arthur_John_Arberry")
	insert_quran_translation(verses, get_path("tanzil_net/translations/en.daryabadi.txt"), "daryabadi", "en", "Abdul Majid Daryabadi", "https://en.wikipedia.org/wiki/Abdul_Majid_Daryabadi")
	insert_quran_translation(verses, get_path("tanzil_net/translations/en.hilali.txt"), "hilali", "en", "Muhammad Taqi-ud-Din al-Hilali and Muhammad Muhsin Khan", "https://en.wikipedia.org/wiki/Noble_Quran_(Hilali-Khan)")
	insert_quran_translation(verses, get_path("tanzil_net/translations/en.itani.txt"), "itani", "en", "Talal Itani", "")
	insert_quran_translation(verses, get_path("tanzil_net/translations/en.maududi.txt"), "maududi", "en", "Abul Ala Maududi", "https://en.wikipedia.org/wiki/Abul_A%27la_Maududi")
	insert_quran_translation(verses, get_path("tanzil_net/translations/en.mubarakpuri.txt"), "mubarakpuri", "en", "Safi-ur-Rahman al-Mubarakpuri", "https://en.wikipedia.org/wiki/Safiur_Rahman_Mubarakpuri")
	insert_quran_translation(verses, get_path("tanzil_net/translations/en.pickthall.txt"), "pickthall", "en", "Mohammed Marmaduke William Pickthall", "https://en.wikipedia.org/wiki/Marmaduke_Pickthall")
	insert_quran_translation(verses, get_path("tanzil_net/translations/en.qarai.txt"), "qarai", "en", "Ali Quli Qarai", "")
	insert_quran_translation(verses, get_path("tanzil_net/translations/en.qaribullah.txt"), "qaribullah", "en", "Hasan al-Fatih Qaribullah and Ahmad Darwish", "")
	insert_quran_translation(verses, get_path("tanzil_net/translations/en.sahih.txt"), "sahih", "en", "Saheeh International", "http://www.saheehinternational.com/")
	insert_quran_translation(verses, get_path("tanzil_net/translations/en.sarwar.txt"), "sarwar", "en", "Muhammad Sarwar", "https://en.wikipedia.org/wiki/Shaykh_Muhammad_Sarwar")
	insert_quran_translation(verses, get_path("tanzil_net/translations/en.shakir.txt"), "shakir", "en", "Mohammad Habib Shakir", "https://en.wikipedia.org/wiki/Muhammad_Habib_Shakir")
	insert_quran_translation(verses, get_path("tanzil_net/translations/en.transliteration.txt"), "transliteration", "en", "English Transliteration", "")
	insert_quran_translation(verses, get_path("tanzil_net/translations/en.wahiduddin.txt"), "wahiduddin", "en", "Wahiduddin Khan", "https://en.wikipedia.org/wiki/Wahiduddin_Khan")
	insert_quran_translation(verses, get_path("tanzil_net/translations/en.yusufali.txt"), "yusufali", "en", "Abdullah Yusuf Ali", "https://en.wikipedia.org/wiki/Abdullah_Yusuf_Ali")

	chapters = build_chapters(get_path("tanzil_net/quran-data.xml"), verses)

	q = Chapter()
	q.index = BOOK_INDEX
	q.path = BOOK_PATH
	q.verse_start_index = 0
	q.part_type = PartType.Book
	q.titles = {
		Language.EN.value: "The Holy Quran",
		Language.AR.value: "القرآن الكريم"
	}
	q.descriptions = {
		Language.EN.value: "Was revealed to the prophet SAW"
	}
	q.chapters=chapters

	
	crumb = Crumb()
	crumb.titles = q.titles
	crumb.indexed_titles = q.titles
	crumb.path = q.path
	q.crumbs = [crumb]

	set_index(q, [0, 0], 0)

	return q

示例#3

0

显示文件

文件： quran.py 项目： narmafraz/ThaqalaynApi

def build_chapters(file: str, verses: List[Verse]) -> List[Chapter]:
	chapters: List[Chapter] = []
	
	quran = xml.etree.ElementTree.parse(file).getroot()

	suras = quran.find('suras')
	for s in suras.findall('sura'):
		meta = s.attrib
		index=int(meta['index'])
		ayas=int(meta['ayas'])
		start=int(meta['start'])
		name=meta['name']
		tname=meta['tname']
		ename=meta['ename']
		type=meta['type']
		order=int(meta['order'])
		rukus=int(meta['rukus'])

		titles = {
			Language.AR.value: name,
			Language.EN.value: ename,
			Language.ENT.value: tname
		}

		sura = Chapter()
		# sura.index=index
		# sura.path=BOOK_PATH + ":" + str(index)
		sura.part_type = PartType.Chapter
		sura.titles=titles
		# sura.verse_count=ayas
		# sura.verse_start_index=start
		sura.reveal_type=type
		sura.order=order
		sura.rukus=rukus
		sura.verses=verses[start:ayas+start]

		# set verse path
		# for verse in sura.verses:
		# 	verse.path=sura.path + ":" + str(verse.index)

		chapters.append(sura)

	sajdas = get_sajda_data(quran)
	for k, v in sajdas.items():
		(sura_index, aya_index) = k
		sajda_chapter = chapters[sura_index - 1]
		sajda_chapter.sajda_type = v
		sajda_chapter.verses[aya_index - 1].sajda_type = v

	# add_group_data(quran, ayaindex, 'juzs', 'juz')
	# add_group_data(quran, ayaindex, 'hizbs', 'quarter')
	# add_group_data(quran, ayaindex, 'manzils', 'manzil')
	# add_group_data(quran, ayaindex, 'rukus', 'ruku')
	# add_group_data(quran, ayaindex, 'pages', 'page')

	return chapters

示例#4

0

显示文件

文件： kafi.py 项目： narmafraz/ThaqalaynApi

def build_volume(file,
                 title_en: str,
                 title_ar: str,
                 description: str,
                 last_volume: bool = False) -> Chapter:
    volume = Chapter()
    volume.titles = {Language.EN.value: title_en, Language.AR.value: title_ar}
    volume.descriptions = {Language.EN.value: description}
    if last_volume:
        volume.chapters = build_hubeali_book_8(file)
    else:
        volume.chapters = build_hubeali_books(file)
    volume.part_type = PartType.Volume

    return volume

示例#5

0

显示文件

文件： kafi.py 项目： narmafraz/ThaqalaynApi

def build_alhassanain_baabs(file) -> List[Chapter]:
    baabs: List[Chapter] = []
    logger.info("Adding Al-Kafi file %s", file)

    with open(file, 'r', encoding='utf8') as qfile:
        inner_html = qfile.read()
        sections = inner_html.split("<br clear=all>")
        for section in sections:
            section_soup = BeautifulSoup(section, 'html.parser')

            headings = section_soup.select(".Heading1Center")
            if not headings:
                continue

            # process "the book of" chapter
            baab_titles = extract_headings(headings)

            en_title = baab_titles[Language.EN.value]

            baab = None
            for existing_baab in baabs:
                if existing_baab.titles[Language.EN.value] == en_title:
                    baab = existing_baab

            if not baab:
                baab = Chapter()
                baab.part_type = PartType.Book
                baab.titles = baab_titles
                baab.chapters = []

                baabs.append(baab)

            # process chapters
            chapters = section_soup.select(".Heading2Center")
            chapters_len = len(chapters)
            for subchapter_index in range(math.ceil(chapters_len / 2)):
                subchapter_heading_index = subchapter_index * 2

                remaining_chapters = chapters[subchapter_heading_index:]
                if len(remaining_chapters) > 1:
                    remaining_chapters = remaining_chapters[:2]
                chapter_titles = extract_headings(remaining_chapters)

                chapter = Chapter()
                chapter.part_type = PartType.Chapter
                chapter.titles = chapter_titles
                chapter.verses = []

                baab.chapters.append(chapter)

                last_element = remaining_chapters[-1]
                last_element = last_element.next_sibling

                verse: Verse = None
                while (last_element is not None and
                       (isinstance(last_element, NavigableString) or
                        (isinstance(last_element, Tag)
                         and 'Heading2Center' not in last_element['class']))):
                    is_tag = isinstance(last_element, Tag)
                    if is_tag and 'libAr' in last_element['class']:

                        # push the last verse if its not the start of chapter
                        if verse != None:
                            chapter.verses.append(verse)

                        verse = Verse()
                        verse.part_type = PartType.Hadith
                        translation = Translation()
                        translation.name = "hubeali"
                        translation.lang = Language.EN.value
                        translation.text = None
                        verse.translations = [translation]

                        verse.text = last_element.get_text(strip=True)

                    if is_tag and 'libNormal' in last_element['class']:
                        if verse.translations[0].text:
                            verse.translations[0].text = verse.translations[
                                0].text + "\n" + last_element.get_text(
                                    strip=True)
                        else:
                            verse.translations[0].text = last_element.get_text(
                                strip=True)

                    last_element = last_element.next_sibling

                if verse != None:
                    chapter.verses.append(verse)

    return baabs

示例#6

0

显示文件

文件： kafi.py 项目： narmafraz/ThaqalaynApi

def build_kafi() -> Chapter:
    kafi = Chapter()
    kafi.index = BOOK_INDEX
    kafi.path = BOOK_PATH
    kafi.titles = {Language.EN.value: "Al-Kafi", Language.AR.value: "الكافي"}
    kafi.descriptions = {
        Language.EN.value:
        "Of the majestic narrator and the scholar, the jurist, the Sheykh Muhammad Bin Yaqoub Al-Kulayni Well known as ‘The trustworthy of Al-Islam Al-Kulayni’ Who died in the year 329 H"
    }
    kafi.chapters = []

    kafi.chapters.append(
        build_volume(get_path("hubeali_com\\Al-Kafi-Volume-1\\"), "Volume One",
                     "الجزء الأول‏", "First volume of Al-Kafi"))

    kafi.chapters.append(
        build_volume(get_path("hubeali_com\\Al-Kafi-Volume-2\\"), "Volume Two",
                     "الجزء الثاني‏", "Second volume of Al-Kafi"))

    kafi.chapters.append(
        build_volume(get_path("hubeali_com\\Al-Kafi-Volume-3\\"),
                     "Volume Three", "الجزء الثالث‏",
                     "Third volume of Al-Kafi"))

    kafi.chapters.append(
        build_volume(get_path("hubeali_com\\Al-Kafi-Volume-4\\"),
                     "Volume Four", "الجزء الرابع‏",
                     "Forth volume of Al-Kafi"))

    kafi.chapters.append(
        build_volume(get_path("hubeali_com\\Al-Kafi-Volume-5\\"),
                     "Volume Five", "الجزء الخامس‏",
                     "Fifth volume of Al-Kafi"))

    kafi.chapters.append(
        build_volume(get_path("hubeali_com\\Al-Kafi-Volume-6\\"), "Volume Six",
                     "الجزء السادس‏", "Sixth volume of Al-Kafi"))

    kafi.chapters.append(
        build_volume(get_path("hubeali_com\\Al-Kafi-Volume-7\\"),
                     "Volume Seven", "الجزء السابع‏",
                     "Seventh volume of Al-Kafi"))

    kafi.chapters.append(
        build_volume(get_path("hubeali_com\\Al-Kafi-Volume-8\\"),
                     "Volume Eight", "الجزء الثامن‏",
                     "Eighth volume of Al-Kafi", True))

    # kafi.chapters.append(build_volume(
    # 	get_path("alhassanain_org\\hubeali_com_usul_kafi_v_01_ed_html\\usul_kafi_v_01_ed.htm"),
    # 	"Volume 1",
    # 	"جلد اول",
    # 	"First volume of Al-Kafi"))

    # kafi.chapters.append(build_volume(
    # 	get_path("alhassanain_org\\hubeali_com_usul_kafi_v_02_ed_html\\usul_kafi_v_02_ed.htm"),
    # 	"Volume 2",
    # 	"جلد 2",
    # 	"Second volume of Al-Kafi"))

    # kafi.chapters.append(build_volume(
    # 	get_path("alhassanain_org\\hubeali_com_usul_kafi_v_03_ed_html\\usul_kafi_v_03_ed.htm"),
    # 	"Volume 3",
    # 	"جلد 3",
    # 	"Third volume of Al-Kafi"))

    # post_processor(kafi)
    kafi.verse_start_index = 0
    kafi.index = BOOK_INDEX
    kafi.path = BOOK_PATH

    crumb = Crumb()
    crumb.titles = kafi.titles
    crumb.indexed_titles = kafi.titles
    crumb.path = kafi.path
    kafi.crumbs = [crumb]

    set_index(kafi, [0, 0, 0, 0], 0)

    return kafi

示例#7

0

显示文件

文件： kafi.py 项目： narmafraz/ThaqalaynApi

def build_hubeali_book_8(dirname) -> List[Chapter]:
    logger.info("Adding Al-Kafi dir %s", dirname)

    cfiles = glob.glob(dirname + "c*.xhtml")

    book = Chapter()
    book.part_type = PartType.Book
    book.titles = {}
    # Arabic title comes from previous file
    book.titles[
        Language.AR.
        value] = "&#1603;&#1578;&#1575;&#1576; &#1575;&#1604;&#1585;&#1617;&#1614;&#1608;&#1618;&#1590;&#1614;&#1577;&#1616;"
    book.titles[Language.EN.value] = "The Book - Garden (of Flowers)"
    book.chapters = []

    is_the_end = False
    previous_hadith_num = 14449
    chapter = None
    chapter_title_ar = None
    hadith_ar = []
    hadith_en = []
    for cfile in cfiles:
        if is_the_end:
            break

        logger.info("Processing file %s", cfile)

        with open(cfile, 'r', encoding='utf8') as qfile:
            file_html = qfile.read()
            file_html = file_correction(cfile, file_html)
            soup = BeautifulSoup(file_html, 'html.parser')

            heading = soup.body.h1
            if we_dont_care(heading):
                continue

            if table_of_contents(heading):
                hadith_ar.append(get_contents(soup.body.contents[-2]))
                continue

            heading_en = get_contents(heading.a)
            is_hadith_title = V8_HADITH_TITLE_PATTERN.match(heading_en)
            # sometimes the anchor is early terminated
            if not heading_en or is_hadith_title:
                heading_en = get_contents(heading)

            if chapter_title_ar or not chapter:
                chapter = Chapter()
                chapter.part_type = PartType.Chapter
                chapter.titles = {}
                if chapter_title_ar:
                    chapter.titles[Language.AR.value] = chapter_title_ar
                else:
                    chapter.titles[
                        Language.AR.
                        value] = "&#1576;&#1616;&#1587;&#1618;&#1605;&#1616; &#1575;&#1604;&#1604;&#1617;&#1614;&#1607;&#1616; &#1575;&#1604;&#1585;&#1617;&#1614;&#1581;&#1618;&#1605;&#1614;&#1606;&#1616; &#1575;&#1604;&#1585;&#1617;&#1614;&#1581;&#1616;&#1610;&#1605;&#1616;"
                if heading_en:
                    chapter.titles[Language.EN.value] = heading_en
                else:
                    chapter.titles[
                        Language.EN.
                        value] = "In the name of Allah, the Beneficent, the Merciful"
                chapter_title_ar = None
                chapter.verses = []

                book.chapters.append(chapter)
            elif is_hadith_title:
                hadith_en.append(heading_en)

            last_element = soup.find('p', 'first-in-chapter')

            while last_element:
                if is_newline(last_element):
                    last_element = last_element.next_sibling
                    continue

                is_tag = isinstance(last_element, Tag)
                is_paragraph = is_tag and last_element.name == 'p'
                is_not_section_break_paragraph = is_paragraph and not is_section_break_tag(
                    last_element)
                is_arabic = is_arabic_tag(last_element)

                element_content = get_contents(last_element)
                element_content = element_content.replace(
                    'style="font-style: italic; font-weight: bold"',
                    'class="ibTxt"')
                element_content = element_content.replace(
                    'style="font-weight: bold"', 'class="bTxt"')
                element_content = element_content.replace(
                    'style="font-style: italic"', 'class="iTxt"')

                is_new_hadith = V8_HADITH_BEGINNING_PATTERN.match(
                    last_element.get_text(strip=True))
                is_the_end = element_content.startswith(
                    "&#1578;&#1614;&#1605;&#1617;&#1614; &#1603;&#1616;&#1578;&#1614;&#1575;&#1576;&#1615; &#1575;&#1604;&#1585;&#1617;&#1614;&#1608;&#1618;&#1590;&#1614;&#1577;&#1616; &#1605;&#1616;&#1606;&#1614;"
                )

                # We commit the hadith that has been building up until now if we encounter a new hadith beginning
                if (is_new_hadith or is_the_end) and hadith_ar and hadith_en:
                    add_hadith(chapter, hadith_ar, hadith_en)
                    hadith_ar = []
                    hadith_en = []

                if is_new_hadith:
                    hadith_num = int(is_new_hadith.group(1))
                    if previous_hadith_num + 1 != hadith_num:
                        print("Skipped one hadith " +
                              str(previous_hadith_num) + " to " +
                              str(hadith_num) + " title: " + element_content)
                    previous_hadith_num = hadith_num

                if is_chapter_title(last_element):
                    if hadith_ar and hadith_en:
                        add_hadith(chapter, hadith_ar, hadith_en)
                        hadith_ar = []
                        hadith_en = []

                    chapter_title_ar = element_content
                elif is_arabic:
                    hadith_ar.append(element_content)
                elif is_not_section_break_paragraph:
                    hadith_en.append(element_content)
                    if is_the_end:
                        add_hadith(chapter, hadith_ar, hadith_en,
                                   PartType.Heading)

                last_element = last_element.next_sibling

    return [book]

示例#8

0

显示文件

文件： kafi.py 项目： narmafraz/ThaqalaynApi

def build_hubeali_books(dirname) -> List[Chapter]:
    books: List[Chapter] = []
    logger.info("Adding Al-Kafi dir %s", dirname)

    cfiles = glob.glob(dirname + "c*.xhtml")

    book = None
    chapter = None
    book_title_ar = None
    chapter_title_ar = None
    hadith_ar = []
    hadith_en = []
    for cfile in cfiles:
        logger.info("Processing file %s", cfile)

        with open(cfile, 'r', encoding='utf8') as qfile:
            file_html = qfile.read()
            file_html = file_correction(cfile, file_html)
            soup = BeautifulSoup(file_html, 'html.parser')

            heading = soup.body.h1
            if we_dont_care(heading):
                continue

            if table_of_contents(heading):
                book_title_ar = get_contents(soup.body.contents[-2])
                continue

            heading_en = get_contents(heading.a)
            # sometimes the anchor is early terminated
            if not heading_en:
                heading_en = get_contents(heading)

            if book_title_ar:
                book = Chapter()
                book.part_type = PartType.Book
                book.titles = {}
                # Arabic title comes from previous file
                book.titles[Language.AR.value] = book_title_ar
                book.titles[Language.EN.value] = heading_en
                book_title_ar = None
                book.chapters = []

                books.append(book)

            elif (chapter_title_ar
                  or not chapter) and heading_en.startswith('Chapter'):
                chapter = Chapter()
                chapter.part_type = PartType.Chapter
                chapter.titles = {}
                chapter.titles[Language.AR.value] = chapter_title_ar
                chapter.titles[Language.EN.value] = heading_en
                chapter_title_ar = None
                chapter.verses = []

                book.chapters.append(chapter)

            elif chapter_title_ar:
                add_hadith(chapter, [chapter_title_ar], [heading_en],
                           PartType.Heading)

                chapter_title_ar = None

            last_element = soup.find('p', 'first-in-chapter')

            while last_element:
                if is_newline(last_element):
                    last_element = last_element.next_sibling
                    continue

                is_tag = isinstance(last_element, Tag)
                is_paragraph = is_tag and last_element.name == 'p'
                is_not_section_break_paragraph = is_paragraph and not is_section_break_tag(
                    last_element)
                is_arabic = is_arabic_tag(last_element)

                element_content = get_contents(last_element)
                element_content = element_content.replace(
                    'style="font-style: italic; font-weight: bold"',
                    'class="ibTxt"')
                element_content = element_content.replace(
                    'style="font-weight: bold"', 'class="bTxt"')
                element_content = element_content.replace(
                    'style="font-style: italic"', 'class="iTxt"')

                is_end_of_hadith = END_OF_HADITH_PATTERN.search(
                    element_content)

                if is_book_title(last_element):
                    if hadith_ar and hadith_en:
                        add_hadith(chapter, hadith_ar, hadith_en,
                                   PartType.Heading)
                        hadith_ar = []
                        hadith_en = []

                    book_title_ar = element_content
                    chapter = None
                elif is_chapter_title(last_element):
                    if hadith_ar and hadith_en:
                        if chapter:
                            add_hadith(chapter, hadith_ar, hadith_en)
                        else:
                            book.descriptions = {}
                            book.descriptions[Language.AR.value] = join_texts(
                                hadith_ar)
                            book.descriptions[Language.EN.value] = join_texts(
                                hadith_en)
                        hadith_ar = []
                        hadith_en = []

                    chapter_title_ar = element_content
                elif is_arabic:
                    hadith_ar.append(element_content)
                # elif is_book_ending(last_element):
                # 	add_hadith(chapter, hadith_ar, [element_content], PartType.Heading)
                # 	hadith_ar = []
                # 	hadith_en = []
                elif is_not_section_break_paragraph:
                    hadith_en.append(element_content)

                if is_end_of_hadith:
                    add_hadith(chapter, hadith_ar, hadith_en)
                    hadith_ar = []
                    hadith_en = []

                last_element = last_element.next_sibling

    return books