示例#1
0
def test_to_iso_639_3(lang_code):
    len_iso_639_1 = max([
        len(lang_code)
        for lang_code in main.settings_global['lang_codes'].values()
    ])
    iso_639_3 = wl_conversion.to_iso_639_3(main, lang_code)

    assert iso_639_3 == {
        iso_639_1: iso_639_3
        for iso_639_3, iso_639_1 in main.settings_global['lang_codes'].items()
    }[lang_code]
示例#2
0
def detect_lang(main, file):
    text = ''

    try:
        with open(file['path'], 'r', encoding=file['encoding']) as f:
            if main.settings_custom['auto_detection']['detection_settings'][
                    'number_lines_no_limit']:
                for line in f:
                    text += line
            else:
                for i, line in enumerate(f):
                    if i < main.settings_custom['auto_detection'][
                            'detection_settings']['number_lines']:
                        text += line
                    else:
                        break

        lang_code_639_1 = langid.classify(text)[0]

        # Chinese (Simplified) & Chinese (Traditional)
        if lang_code_639_1 == 'zh':
            lang_code_639_1 = 'zh_cn'

            for lang in sorted(langdetect.detect_langs(text),
                               key=lambda item: -item.prob):
                if lang.lang in ['zh-cn', 'zh-tw']:
                    lang_code_639_1 = lang.lang.replace('-', '_')

                    break
        # Norwegian Bokmål
        elif lang_code_639_1 == 'no':
            lang_code_639_1 = 'nb'

        # Serbian (Cyrillic)
        elif lang_code_639_1 == 'sr':
            lang_code_639_1 = 'sr_cyrl'

        lang = wl_conversion.to_iso_639_3(main, lang_code_639_1)

        success = True
    except:
        lang = main.settings_custom['auto_detection']['default_settings'][
            'default_lang']

        success = False

    return lang, success
def check_missing_extra_langs(langs_supported, langs_global, msg):
    global lang_missing
    global lang_extra

    for lang_code in langs_supported:
        lang_code_639_3 = wl_conversion.to_iso_639_3(main, lang_code)

        if lang_code_639_3 not in langs_global:
            print(
                f'''Missing language code "{lang_code_639_3}/{lang_code}" found for {msg}!'''
            )

            lang_missing = True

    for lang_code in langs_global:
        lang_code_639_1 = wl_conversion.to_iso_639_1(main, lang_code)

        if lang_code_639_1 not in langs_supported:
            print(
                f'''Extra language code "{lang_code}/{lang_code_639_1}" found for {msg}!'''
            )

            lang_extra = True
示例#4
0
def detect_lang_text(main, text):
    lang_code_639_1 = langid.classify(text)[0]

    # Chinese (Simplified) & Chinese (Traditional)
    if lang_code_639_1 == 'zh':
        lang_code_639_1 = 'zh_cn'

        for lang in sorted(langdetect.detect_langs(text),
                           key=lambda item: -item.prob):
            if lang.lang in ['zh-cn', 'zh-tw']:
                lang_code_639_1 = lang.lang.replace('-', '_')

                break
    # English
    elif lang_code_639_1 == 'en':
        lang_code_639_1 = 'en_us'
    # German
    elif lang_code_639_1 == 'de':
        lang_code_639_1 = 'de_de'
    # Norwegian Bokmål
    elif lang_code_639_1 == 'no':
        lang_code_639_1 = 'nb'
    # Portuguese
    elif lang_code_639_1 == 'pt':
        lang_code_639_1 = 'pt_pt'
    # Serbian (Cyrillic)
    elif lang_code_639_1 == 'sr':
        lang_code_639_1 = 'sr_cyrl'

    lang = wl_conversion.to_iso_639_3(main, lang_code_639_1)

    # Other Languages
    if lang is None:
        lang = 'other'

    return lang
示例#5
0
def test_to_iso_639_3():
    for lang_code in TO_ISO_639_3.keys():
        lang_code_639_3 = wl_conversion.to_iso_639_3(main, lang_code)

        assert lang_code_639_3 == TO_ISO_639_3[lang_code]