示例#1
0
def test_known_native_names():
    lang_obj = languages.getlang_by_native_name('English')
    assert lang_obj is not None, 'English not found'
    assert lang_obj.code == "en", 'Wrong code'
    assert lang_obj.name == "English", 'Wrong name'
    assert lang_obj.native_name == "English", 'Wrong native_name'

    lang_obj = languages.getlang_by_native_name('isiZulu')
    assert lang_obj is not None, 'Zulu not found'
    assert lang_obj.code == "zul", 'Wrong internal repr. code'
    assert lang_obj.name == "Zulu", 'Wrong name'
    assert lang_obj.native_name == "isiZulu", 'Wrong native_name'

    # NOTE: Currently only support full-name matching so would have to lookup by
    #       "name, country" to get local language version
    lang_obj = languages.getlang_by_native_name('Português')
    assert lang_obj is not None, 'Portuguese not found'
    assert lang_obj.code == "pt", 'Wrong internal repr. code'
    assert lang_obj.name == "Portuguese", 'Wrong name'
    assert lang_obj.native_name == "Português", 'Wrong native_name'

    # NOTE: Currently only support full match lookups where multiple language
    #       specified spearated by semicolons, e.g. "Scottish Gaelic; Gaelic"
    lang_obj = languages.getlang_by_native_name('Gàidhlig')
    assert lang_obj is not None, 'Scottish Gaelic; Gaelic not found'
    assert lang_obj.code == "gd", 'Wrong internal repr. code'
    assert lang_obj.name == "Scottish Gaelic; Gaelic", 'Wrong name'
    assert lang_obj.native_name == "Gàidhlig", 'Wrong native_name'
def test_known_native_names():
    lang_obj = languages.getlang_by_native_name('English')
    assert lang_obj is not None, 'English not found'
    assert lang_obj.code == "en", 'Wrong code'
    assert lang_obj.name == "English", 'Wrong name'
    assert lang_obj.native_name == "English", 'Wrong native_name'

    lang_obj = languages.getlang_by_native_name('isiZulu')
    assert lang_obj is not None, 'Zulu not found'
    assert lang_obj.code == "zul", 'Wrong internal repr. code'
    assert lang_obj.name == "Zulu", 'Wrong name'
    assert lang_obj.native_name == "isiZulu", 'Wrong native_name'

    # NOTE: Currently only support full-name matching so would have to lookup by
    #       "name, country" to get local language version
    lang_obj = languages.getlang_by_native_name('Português')
    assert lang_obj is not None, 'Portuguese not found'
    assert lang_obj.code == "pt", 'Wrong internal repr. code'
    assert lang_obj.name == "Portuguese", 'Wrong name'
    assert lang_obj.native_name == "Português", 'Wrong native_name'

    # NOTE: Currently only support full match lookups where multiple language
    #       specified spearated by semicolons, e.g. "Scottish Gaelic; Gaelic"
    lang_obj = languages.getlang_by_native_name('Gàidhlig')
    assert lang_obj is not None, 'Scottish Gaelic; Gaelic not found'
    assert lang_obj.code == "gd", 'Wrong internal repr. code'
    assert lang_obj.name == "Scottish Gaelic; Gaelic", 'Wrong name'
    assert lang_obj.native_name == "Gàidhlig", 'Wrong native_name'
def test_list_like_language_native_names():
    lang_obj = languages.getlang_by_native_name('Iñupiaq')
    assert lang_obj is not None, 'Inupiaq not found'
    assert lang_obj.code == "ik", 'Wrong internal repr. code'
    assert lang_obj.name == "Inupiaq", 'Wrong name'
    assert lang_obj.native_name == "Iñupiaq, Iñupiatun", 'Wrong native_name'
    #
    lang_obj = languages.getlang_by_native_name('Iñupiatun')
    assert lang_obj is not None, 'Inupiaq not found'
    assert lang_obj.code == "ik", 'Wrong internal repr. code'
    assert lang_obj.name == "Inupiaq", 'Wrong name'
    assert lang_obj.native_name == "Iñupiaq, Iñupiatun", 'Wrong native_name'
示例#4
0
def test_list_like_language_native_names():
    lang_obj = languages.getlang_by_native_name('Iñupiaq')
    assert lang_obj is not None, 'Inupiaq not found'
    assert lang_obj.code == "ik", 'Wrong internal repr. code'
    assert lang_obj.name == "Inupiaq", 'Wrong name'
    assert lang_obj.native_name == "Iñupiaq, Iñupiatun", 'Wrong native_name'
    #
    lang_obj = languages.getlang_by_native_name('Iñupiatun')
    assert lang_obj is not None, 'Inupiaq not found'
    assert lang_obj.code == "ik", 'Wrong internal repr. code'
    assert lang_obj.name == "Inupiaq", 'Wrong name'
    assert lang_obj.native_name == "Iñupiaq, Iñupiatun", 'Wrong native_name'
def test_list_like_language_native_names():
    lang_obj = languages.getlang_by_native_name("Iñupiaq")
    assert lang_obj is not None, "Inupiaq not found"
    assert lang_obj.code == "ik", "Wrong internal repr. code"
    assert lang_obj.name == "Inupiaq", "Wrong name"
    assert lang_obj.native_name == "Iñupiaq, Iñupiatun", "Wrong native_name"
    #
    lang_obj = languages.getlang_by_native_name("Iñupiatun")
    assert lang_obj is not None, "Inupiaq not found"
    assert lang_obj.code == "ik", "Wrong internal repr. code"
    assert lang_obj.name == "Inupiaq", "Wrong name"
    assert lang_obj.native_name == "Iñupiaq, Iñupiatun", "Wrong native_name"
def test_african_languages(african_languages_list):
    missing_names = []
    for native_name in african_languages_list:
        lang_obj = languages.getlang_by_native_name(native_name)
        if lang_obj is None:
            missing_names.append(native_name)
    assert missing_names == [], 'Languages with native_names missing: ' + str(missing_names)
示例#7
0
def test_african_languages(african_languages_list):
    missing_names = []
    for native_name in african_languages_list:
        lang_obj = languages.getlang_by_native_name(native_name)
        if lang_obj is None:
            missing_names.append(native_name)
    assert missing_names == [], 'Languages with native_names missing: ' + str(missing_names)
 def __get_language_code(self, language_str):
     language = getlang_by_name(language_str) or getlang_by_native_name(
         language_str)
     if language:
         return language.code
     else:
         print('Unknown language:', language_str)
         return NalibaliChef.ENGLISH_LANGUAGE_CODE
示例#9
0
    def construct_channel(self, *args, **kwargs):
        channel = self.get_channel(
            *args,
            **kwargs)  # Create ChannelNode from data in self.channel_info

        lang_names = list(self.data.keys())
        lang_names.sort()

        for lang_name in lang_names:
            lang_data = self.data[lang_name]
            LOGGER.info("Creating app for language: {}".format(lang_name))
            lang = languages.getlang_by_native_name(lang_name)

            zip_dir = self.client.create_zip_dir_for_page(lang_data['url'])

            soup = self.client.get_page_soup(lang_data['url'])

            # Remove the translation list if found
            translations = soup.find('div', {'id': 'translations'})
            if translations:
                translations.extract()

            # Grab the localized title
            title = soup.find('span', {'id': 'share_title'}).text

            # Save the modified index.html page
            thumbnail = None
            for resource in lang_data['resources']:
                if 'dp3t.png' in resource:
                    thumbnail = os.path.join(zip_dir, resource)
                    break

            with open(os.path.join(zip_dir, 'index.html'), 'wb') as f:
                f.write(soup.prettify(encoding='utf-8'))

            # create_predictable_zip ensures that the ZIP file does not change each time it's created. This
            # ensures that the zip doesn't get re-uploaded just because zip metadata changed.
            zip_file = zip.create_predictable_zip(zip_dir)
            zip_name = lang.primary_code if lang else lang_name
            zip_filename = os.path.join(self.ZIP_DIR,
                                        "{}.zip".format(zip_name))
            os.makedirs(os.path.dirname(zip_filename), exist_ok=True)
            os.rename(zip_file, zip_filename)

            topic = nodes.TopicNode(source_id=lang_name, title=lang_name)
            zip_node = nodes.HTML5AppNode(
                source_id="covid19-sim-{}".format(lang_name),
                title=title,
                files=[files.HTMLZipFile(zip_filename)],
                license=licenses.PublicDomainLicense(
                    "Marcel Salathé & Nicky Case"),
                language=lang,
                thumbnail=thumbnail)
            topic.add_child(zip_node)
            channel.add_child(topic)

        return channel
示例#10
0
def test_language_names_with_modifier_in_bracket():
    # try to match based on language name (stuff before subcode in brackets)
    lang_obj = languages.getlang_by_native_name('日本語')
    assert lang_obj is not None, 'Japanese not found'
    assert lang_obj.code == "ja", 'Wrong internal repr. code'
    assert lang_obj.name == "Japanese", 'Wrong name'
    assert lang_obj.native_name == "日本語 (にほんご/にっぽんご)", 'Wrong native_name'
    #
    lang_obj = languages.getlang_by_native_name('한국어')
    assert lang_obj is not None, 'Korean not found'
    assert lang_obj.code == "ko", 'Wrong internal repr. code'
    assert lang_obj.name == "Korean", 'Wrong name'
    assert lang_obj.native_name == "한국어 (韓國語), 조선말 (朝鮮語)", 'Wrong native_name'
    #
    lang_obj = languages.getlang_by_native_name('조선말')
    assert lang_obj is not None, 'Korean not found'
    assert lang_obj.code == "ko", 'Wrong internal repr. code'
    assert lang_obj.name == "Korean", 'Wrong name'
    assert lang_obj.native_name == "한국어 (韓國語), 조선말 (朝鮮語)", 'Wrong native_name'
示例#11
0
def test_language_names_with_modifier_in_bracket():
    # try to match based on language name (stuff before subcode in brackets)
    lang_obj = languages.getlang_by_native_name('中文')
    assert lang_obj is not None, 'Chinese 1 not found'
    assert lang_obj.code == "zh", 'Wrong internal repr. code'
    assert lang_obj.name == "Chinese", 'Wrong name'
    assert lang_obj.native_name == "中文 (Zhōngwén), 汉语, 漢語", 'Wrong native_name'
    #
    lang_obj = languages.getlang_by_native_name('汉语')
    assert lang_obj is not None, 'Chinese 2 not found'
    assert lang_obj.code == "zh", 'Wrong internal repr. code'
    assert lang_obj.name == "Chinese", 'Wrong name'
    assert lang_obj.native_name == "中文 (Zhōngwén), 汉语, 漢語", 'Wrong native_name'
    #
    lang_obj = languages.getlang_by_native_name('漢語')
    assert lang_obj is not None, 'Chinese 3 not found'
    assert lang_obj.code == "zh", 'Wrong internal repr. code'
    assert lang_obj.name == "Chinese", 'Wrong name'
    assert lang_obj.native_name == "中文 (Zhōngwén), 汉语, 漢語", 'Wrong native_name'
    #
    lang_obj = languages.getlang_by_native_name('日本語')
    assert lang_obj is not None, 'Japanese not found'
    assert lang_obj.code == "ja", 'Wrong internal repr. code'
    assert lang_obj.name == "Japanese", 'Wrong name'
    assert lang_obj.native_name == "日本語 (にほんご/にっぽんご)", 'Wrong native_name'
    #
    lang_obj = languages.getlang_by_native_name('한국어')
    assert lang_obj is not None, 'Korean not found'
    assert lang_obj.code == "ko", 'Wrong internal repr. code'
    assert lang_obj.name == "Korean", 'Wrong name'
    assert lang_obj.native_name == "한국어 (韓國語), 조선말 (朝鮮語)", 'Wrong native_name'
    #
    lang_obj = languages.getlang_by_native_name('조선말')
    assert lang_obj is not None, 'Korean not found'
    assert lang_obj.code == "ko", 'Wrong internal repr. code'
    assert lang_obj.name == "Korean", 'Wrong name'
    assert lang_obj.native_name == "한국어 (韓國語), 조선말 (朝鮮語)", 'Wrong native_name'
示例#12
0
def test_unknown_name():
    lang_obj = languages.getlang_by_native_name('UnknoenNativeLanguage')
    assert lang_obj is None, 'query for natove_name UnknoenNativeLanguage returned non-None'
示例#13
0
def test_unknown_name():
    lang_obj = languages.getlang_by_native_name('UnknoenNativeLanguage')
    assert lang_obj is None, 'query for natove_name UnknoenNativeLanguage returned non-None'
示例#14
0
def test_unknown_native_language():
    lang_obj = languages.getlang_by_native_name("UnknoenNativeLanguage")
    assert (
        lang_obj is None
    ), "query for natove_name UnknoenNativeLanguage returned non-None"
def build_lang_lookup_table(FEED_ROOT_URL):
    """
    Extracts all the root URLs of the languages, based on the links
    with face `Languages` in FEED_ROOT_URL.
    """
    OPDS_LANG_ROOTS = {}

    # Check for languages we don't yet support in Kolibri.
    langs_not_found = []

    feed = feedparser.parse(FEED_ROOT_URL)
    lang_links = []
    for link in feed.feed.links:
        if 'opds:facetgroup' in link:
            fg = link['opds:facetgroup']
            if fg == 'Languages':
                lang_links.append(link)

    # Build lookup table    lang_code --> dict with info about content in that langauge
    # where lang_code is the Learning Equality internal language codes defined in le_utils
    # Assume the chef scrill will be run on the command line using   lang=lang_code
    # E.g. lang_code for Zulu is `zul`, for Amharic it's `am`, and for Nepali it's `ne-NP`
    for link in lang_links:
        href = link['href']
        m = _LANG_CODE_RE.search(href)
        if not m:
            raise ValueError('Cannot find language code in href: ' + str(href))
        gdl_lang_code = m.groupdict()['gdl_lang_code']
        lang_title = link['title']
        if lang_title == "isiNdebele seSewula":
            lang_title = "isiNdebele"
        elif lang_title == 'বাঙালি':
            lang_title = 'বাংলা'

        print('Processig lang_title', lang_title)
        #
        # ATTEMPT 1 ##############
        lang_obj = getlang_by_name(lang_title)
        if not lang_obj:
            lang_obj = getlang_by_native_name(lang_title)
            #
            # ATTEMPT 2 #########
            if not lang_obj:
                pyc_lang = pycountry.languages.lookup(gdl_lang_code)
                code = pyc_lang.alpha_3
                if hasattr(pyc_lang, 'alpha_2'):
                    #
                    # ATTEMPT 3 ##############
                    code = pyc_lang.alpha_2

                # getlang_by_alpha2 is a misnomer, codes can be alpha2, alpha3, or lang+locale.
                lang_obj = getlang_by_alpha2(code)
                if not lang_obj:
                    langs_not_found.append((pyc_lang, lang_title))
                    print('ERROR could not find Kolibri lang info for ',
                          pyc_lang)
                    continue
        lang_code = lang_obj.code
        OPDS_LANG_ROOTS[lang_code] = dict(
            alpha_3=gdl_lang_code,
            lang_title=lang_title,
            href=href,
            name=lang_obj.name,
            native_name=lang_obj.native_name,
        )

    # For now, make missing languages a hard error so we can evaluate new language support case-by-case.
    if len(langs_not_found) > 0:
        lang_codes = []
        for pyc_lang, lang_title in langs_not_found:
            lang_codes.append(pyc_lang.alpha_3)
        message = "The following languages are not yet supported in Kolibri: {}".format(
            ",".join(lang_codes))
        assert len(langs_not_found) == 0, message

    return OPDS_LANG_ROOTS