示例#1
0
def insert_quran_translation(verses, verse_translations, file, key, lang,
                             author, bio):
    logger.info("Adding Quran translation file %s", file)

    id = lang + "." + key
    qt = Translation()
    qt.lang = lang
    qt.name = author
    qt.id = id
    verse_translations.append(qt)

    index = 0
    with open(file, 'r', encoding='utf8') as qfile:
        for line in qfile.readlines():
            text = line.strip()
            if text and not text.startswith('#'):
                verses[index].translations[id] = [text]
                index = index + 1
示例#2
0
def build_alhassanain_baabs(file) -> List[Chapter]:
    baabs: List[Chapter] = []
    logger.info("Adding Al-Kafi file %s", file)

    translation = Translation()
    translation.name = "HubeAli.com"
    translation.lang = Language.EN.value
    translation.id = HUBEALI_TRANSLATION_ID

    with open(file, 'r', encoding='utf8') as qfile:
        inner_html = qfile.read()
        sections = inner_html.split("<br clear=all>")
        for section in sections:
            section_soup = BeautifulSoup(section, 'html.parser')

            headings = section_soup.select(".Heading1Center")
            if not headings:
                continue

            # process "the book of" chapter
            baab_titles = extract_headings(headings)

            en_title = baab_titles[Language.EN.value]

            baab = None
            for existing_baab in baabs:
                if existing_baab.titles[Language.EN.value] == en_title:
                    baab = existing_baab

            if not baab:
                baab = Chapter()
                baab.part_type = PartType.Book
                baab.titles = baab_titles
                baab.chapters = []

                baabs.append(baab)

            # process chapters
            chapters = section_soup.select(".Heading2Center")
            chapters_len = len(chapters)
            for subchapter_index in range(math.ceil(chapters_len / 2)):
                subchapter_heading_index = subchapter_index * 2

                remaining_chapters = chapters[subchapter_heading_index:]
                if len(remaining_chapters) > 1:
                    remaining_chapters = remaining_chapters[:2]
                chapter_titles = extract_headings(remaining_chapters)

                chapter = Chapter()
                chapter.part_type = PartType.Chapter
                chapter.titles = chapter_titles
                chapter.verse_translations = [translation]
                chapter.verses = []

                baab.chapters.append(chapter)

                last_element = remaining_chapters[-1]
                last_element = last_element.next_sibling

                verse: Verse = None
                while (last_element is not None and
                       (isinstance(last_element, NavigableString) or
                        (is_tag(last_element)
                         and 'Heading2Center' not in last_element['class']))):
                    is_a_tag = is_tag(last_element)
                    if is_a_tag and 'libAr' in last_element['class']:

                        # push the last verse if its not the start of chapter
                        if verse != None:
                            chapter.verses.append(verse)

                        verse = Verse()
                        verse.part_type = PartType.Hadith
                        verse.translations = {}
                        verse.translations[HUBEALI_TRANSLATION_ID] = []

                        verse.text = [last_element.get_text(strip=True)]

                    if is_a_tag and 'libNormal' in last_element['class']:
                        verse.translations[HUBEALI_TRANSLATION_ID].append(
                            last_element.get_text(strip=True))

                    last_element = last_element.next_sibling

                if verse != None:
                    chapter.verses.append(verse)

    return baabs
示例#3
0
	
	return baabs

HUBEALI_TRANSLATION_ID = "en.hubeali"
VOLUME_HEADING_PATTERN = re.compile("^AL-KAFI VOLUME")
TABLE_OF_CONTENTS_PATTERN = re.compile("^TABLE OF CONTENTS")
WHITESPACE_PATTERN = re.compile(r"^\s*$")
V8_HADITH_TITLE_PATTERN = re.compile(r"^H \d+")
V8_HADITH_BEGINNING_PATTERN = re.compile(r"^-? ?(1\d+)-?")
END_OF_HADITH_PATTERN = re.compile(r"<sup>\[\d+\]</sup>\s*$")
END_OF_HADITH_CLEANUP_PATTERN = re.compile(r'<a id="[^"]+"/?>(</a>)?<sup>\[\d+\]</sup>\s*$')

hubbeali_translation = Translation()
hubbeali_translation.name = "HubeAli.com"
hubbeali_translation.lang = Language.EN.value
hubbeali_translation.id = HUBEALI_TRANSLATION_ID

def we_dont_care(heading):
	if heading is None:
		return True
	
	htext = heading.get_text(strip=True).upper()
	if VOLUME_HEADING_PATTERN.match(htext):
		return True
	
	return False

def table_of_contents(heading):
	htext = heading.get_text(strip=True).upper()
	return TABLE_OF_CONTENTS_PATTERN.match(htext)