def insert_quran_translation(verses, verse_translations, file, key, lang, author, bio): logger.info("Adding Quran translation file %s", file) id = lang + "." + key qt = Translation() qt.lang = lang qt.name = author qt.id = id verse_translations.append(qt) index = 0 with open(file, 'r', encoding='utf8') as qfile: for line in qfile.readlines(): text = line.strip() if text and not text.startswith('#'): verses[index].translations[id] = [text] index = index + 1
def build_alhassanain_baabs(file) -> List[Chapter]: baabs: List[Chapter] = [] logger.info("Adding Al-Kafi file %s", file) translation = Translation() translation.name = "HubeAli.com" translation.lang = Language.EN.value translation.id = HUBEALI_TRANSLATION_ID with open(file, 'r', encoding='utf8') as qfile: inner_html = qfile.read() sections = inner_html.split("<br clear=all>") for section in sections: section_soup = BeautifulSoup(section, 'html.parser') headings = section_soup.select(".Heading1Center") if not headings: continue # process "the book of" chapter baab_titles = extract_headings(headings) en_title = baab_titles[Language.EN.value] baab = None for existing_baab in baabs: if existing_baab.titles[Language.EN.value] == en_title: baab = existing_baab if not baab: baab = Chapter() baab.part_type = PartType.Book baab.titles = baab_titles baab.chapters = [] baabs.append(baab) # process chapters chapters = section_soup.select(".Heading2Center") chapters_len = len(chapters) for subchapter_index in range(math.ceil(chapters_len / 2)): subchapter_heading_index = subchapter_index * 2 remaining_chapters = chapters[subchapter_heading_index:] if len(remaining_chapters) > 1: remaining_chapters = remaining_chapters[:2] chapter_titles = extract_headings(remaining_chapters) chapter = Chapter() chapter.part_type = PartType.Chapter chapter.titles = chapter_titles chapter.verse_translations = [translation] chapter.verses = [] baab.chapters.append(chapter) last_element = remaining_chapters[-1] last_element = last_element.next_sibling verse: Verse = None while (last_element is not None and (isinstance(last_element, NavigableString) or (is_tag(last_element) and 'Heading2Center' not in last_element['class']))): is_a_tag = is_tag(last_element) if is_a_tag and 'libAr' in last_element['class']: # push the last verse if its not the start of chapter if verse != None: chapter.verses.append(verse) verse = Verse() verse.part_type = PartType.Hadith verse.translations = {} verse.translations[HUBEALI_TRANSLATION_ID] = [] verse.text = [last_element.get_text(strip=True)] if is_a_tag and 'libNormal' in last_element['class']: verse.translations[HUBEALI_TRANSLATION_ID].append( last_element.get_text(strip=True)) last_element = last_element.next_sibling if verse != None: chapter.verses.append(verse) return baabs
return baabs HUBEALI_TRANSLATION_ID = "en.hubeali" VOLUME_HEADING_PATTERN = re.compile("^AL-KAFI VOLUME") TABLE_OF_CONTENTS_PATTERN = re.compile("^TABLE OF CONTENTS") WHITESPACE_PATTERN = re.compile(r"^\s*$") V8_HADITH_TITLE_PATTERN = re.compile(r"^H \d+") V8_HADITH_BEGINNING_PATTERN = re.compile(r"^-? ?(1\d+)-?") END_OF_HADITH_PATTERN = re.compile(r"<sup>\[\d+\]</sup>\s*$") END_OF_HADITH_CLEANUP_PATTERN = re.compile(r'<a id="[^"]+"/?>(</a>)?<sup>\[\d+\]</sup>\s*$') hubbeali_translation = Translation() hubbeali_translation.name = "HubeAli.com" hubbeali_translation.lang = Language.EN.value hubbeali_translation.id = HUBEALI_TRANSLATION_ID def we_dont_care(heading): if heading is None: return True htext = heading.get_text(strip=True).upper() if VOLUME_HEADING_PATTERN.match(htext): return True return False def table_of_contents(heading): htext = heading.get_text(strip=True).upper() return TABLE_OF_CONTENTS_PATTERN.match(htext)