Пример #1
0
def to_posix_string(lang_code):
    # Normalize fra-ca to fr_CA
    lang_code = use_underscore(lang_code)
    if not lang_code:
        return None
    if '_' in lang_code:
        # ISO format, must convert to POSIX format
        lang, country = lang_code.split('_')[:2]
    else:
        lang, country = lang_code, None
    if is_valid639_1(lang):
        posix_lang = lang
    elif is_valid639_2(lang):
        temp = to_iso639_1(lang)
        posix_lang = temp if temp else lang
    else:
        full_name = lang.lower().capitalize()
        if is_valid639_2(full_name):
            posix_lang = to_iso639_1(full_name)
        else:
            raise ValueError("""The input %s in not a valid code to convert
                             to posix format % (full_name,)""")
    if country:
        return '_'.join([posix_lang.lower(), country.upper()])
    else:
        return posix_lang.lower()
Пример #2
0
def is_valid_lang(lang):
    if len(lang) == 2:
        if iso639.is_valid639_1(lang):
            return True
    elif len(lang) == 3:
        if iso639.is_valid639_2(lang):
            return True
    else:
        return False
Пример #3
0
def to_3_letter_lang(lang):
    if len(lang) == 2:
        if iso639.is_valid639_1(lang):
            return iso639.to_iso639_2(lang)

    if len(lang) == 3:
        if iso639.is_valid639_2(lang):
            return lang

    return False
Пример #4
0
def to_posix_string(locale_code):
    if not locale_code:
        return None
    # Normalize fra-ca to fr_CA
    locale_code = use_underscore(locale_code)
    locale_parts = locale_code.split("_")
    # Normalize first component
    lang = locale_parts[0]
    if is_valid639_1(lang):
        posix_lang = lang
    elif is_valid639_2(lang):
        temp = to_iso639_1(lang)
        posix_lang = temp if temp else lang
    else:
        # Aryan, not sure what case is being covered here
        full_name = lang.lower().capitalize()
        if is_valid639_2(full_name):
            posix_lang = to_iso639_1(full_name)
        else:
            raise ValueError(
                "The input %s in not a valid code to convert to posix format" %
                (locale_code, ))
    locale_parts[0] = posix_lang
    if len(locale_parts) > 4:
        raise ValueError("This locale has too many parts: " + locale_code)
    elif len(locale_parts) == 4:
        # Drop dialect. Sorry.
        locale_parts.pop()
    if len(locale_parts) > 1:
        # Normalize Country
        if len(locale_parts[-1]) == 2:
            locale_parts[-1] = locale_parts[-1].upper()
        elif len(locale_parts[-1]) != 4:
            raise ValueError("The last part is not a script or country: " +
                             locale_code)
        # Normalize script
        if len(locale_parts[1]) == 4:
            locale_parts[1] = locale_parts[1].capitalize()
    return "_".join(locale_parts)
Пример #5
0
def to_posix_string(locale_code):
    if not locale_code:
        return None
    # Normalize fra-ca to fr_CA
    locale_code = use_underscore(locale_code)
    locale_parts = locale_code.split("_")
    # Normalize first component
    lang = locale_parts[0]
    if is_valid639_1(lang):
        posix_lang = lang
    elif is_valid639_2(lang):
        temp = to_iso639_1(lang)
        posix_lang = temp if temp else lang
    else:
        # Aryan, not sure what case is being covered here
        full_name = lang.lower().capitalize()
        if is_valid639_2(full_name):
            posix_lang = to_iso639_1(full_name)
        else:
            raise ValueError(
                "The input %s in not a valid code to convert to posix format" %
                (locale_code,))
    locale_parts[0] = posix_lang
    if len(locale_parts) > 4:
        raise ValueError("This locale has too many parts: "+locale_code)
    elif len(locale_parts) == 4:
        # Drop dialect. Sorry.
        locale_parts.pop()
    if len(locale_parts) > 1:
        # Normalize Country
        if len(locale_parts[-1]) == 2:
            locale_parts[-1] = locale_parts[-1].upper()
        elif len(locale_parts[-1]) != 4:
            raise ValueError(
                "The last part is not a script or country: "+locale_code)
        # Normalize script
        if len(locale_parts[1]) == 4:
            locale_parts[1] = locale_parts[1].capitalize()
    return "_".join(locale_parts)
Пример #6
0
def to_posix_format(lang_code):
    # Normalize fra-ca to fr_CA
    lang_code = use_underscore(lang_code)
    if not lang_code:
        return None
    if '_' in lang_code:
        # ISO format, must convert to POSIX format
        lang, country = lang_code.split('_')[:2]
    else:
        lang, country = lang_code, None
    if is_valid639_1(lang):
        posix_lang = lang
    elif is_valid639_2(lang):
        posix_lang = to_iso639_1(lang)
    else:
        full_name = lang.lower().capitalize()
        if is_valid639_2(full_name):
            posix_lang = to_iso639_1(full_name)
        else:
            return
    if country:
        return '_'.join([posix_lang.lower(), country.upper()])
    else:
        return posix_lang.lower()
Пример #7
0
def to_posix_format(lang_code):
    # Normalize fra-ca to fr_CA
    lang_code = use_underscore(lang_code)
    if not lang_code:
        return None
    if '_' in lang_code:
        # ISO format, must convert to POSIX format
        lang, country = lang_code.split('_')[:2]
    else:
        lang, country = lang_code, None
    if is_valid639_1(lang):
        posix_lang = lang
    elif is_valid639_2(lang):
        posix_lang = to_iso639_1(lang)
    else:
        full_name = lang.lower().capitalize()
        if is_valid639_2(full_name):
            posix_lang = to_iso639_1(full_name)
        else:
            return
    if country:
        return '_'.join([posix_lang.lower(), country.upper()])
    else:
        return posix_lang.lower()
Пример #8
0
def get_filename_language(full_path):
    filename = os.path.basename(full_path).split(".")

    forced = False
    numbered = False
    sub_lang = filename[-2].lower()

    if sub_lang == "forced":
        forced = True
        sub_lang = filename[-3].lower()
        if sub_lang.isnumeric():
            numbered = True
            sub_lang = filename[-4].lower()
    elif sub_lang.isnumeric():
        numbered = True
        sub_lang = filename[-3].lower()

    if len(sub_lang) == 2 or len(sub_lang) == 3:
        if not iso639.is_valid639_1(sub_lang) and not iso639.is_valid639_2(sub_lang):
            sub_lang = "Unknown"
    else:
        sub_lang = "Unknown"

    return (sub_lang, forced, numbered)
Пример #9
0
 def clean_language(self):
     language = self.cleaned_data['language'].strip()
     if not is_valid639_2(language):
         raise forms.ValidationError(
             _("Language must be valid a ISO-639-2 code"))
     return language
Пример #10
0
def is_valid_iso639_code(value):
    return is_valid639_1(value) or is_valid639_2(value)
Пример #11
0
def valid_lang(lang):
    return is_valid639_2(lang)
def findSubtitlesNoneIso639(scanfolder, isoMode, disablelangdetect):
    """
    Detect subtitles that do not comply with ISO-639.

    TODO: Add more subtitle extensions (and read/parse them correctly for
          language detection)
    TODO: Seperate language detection better in different functions
    TODO: Add percentage of certainty and possible other languages when
          low certainty
    TODO: Handle unicode better to detect languages like German and Dutch
          better
    TODO: Use table
    """
    subtitleExts = ['.srt', '.sub', '.ass']
    total = 0
    incorrect = 0
    detectedlang = 0
    for subdir, dirnames, filenames in os.walk(scanfolder):
        for filename in filenames:
            incorrectSubtitle = False
            extension = os.path.splitext(filename)[1].lower()
            # subdirName = os.path.basename(os.path.normpath(subdir))
            if extension in subtitleExts:
                total = total + 1
                langcodeFromFilename = getIsoLanguageCodeFromFilename(filename)
                detectedLanguage = ""
                detectedIsoMode = False
                if is_valid639_1(langcodeFromFilename):
                    detectedIsoMode = "1"
                    detectedLanguage = iso639_to_name(langcodeFromFilename)
                if is_valid639_2(langcodeFromFilename):
                    detectedIsoMode = "2"
                    detectedLanguage = iso639_to_name(langcodeFromFilename)
                if detectedIsoMode is not isoMode:
                    isoShouldBe = ""
                    if isoMode == "1" and detectedIsoMode == "2":
                        isoShouldBe = to_iso639_1(langcodeFromFilename)
                    if isoMode == "2" and detectedIsoMode == "1":
                        isoShouldBe = to_iso639_2(langcodeFromFilename)
                    filepath = subdir + os.sep + filename
                    incorrectSubtitle = True
                    incorrect = incorrect + 1
                    warning = "Incorrectly named subtitle found at "
                    warning += bold(filepath)
                    printNotificationWarning(warning)
                    if detectedIsoMode is not False:
                        info = "\t\tLang code " + bold(langcodeFromFilename)
                        info += " (ISO 639-" + str(detectedIsoMode) + ") "
                        info += "detected. The ISO 639-" + isoMode + " code"
                        info += " for " + detectedLanguage + " is "
                        info += bold(isoShouldBe) + "."
                        printNotificationInfo(info)
                if incorrectSubtitle and not disablelangdetect:
                    filepath = subdir + os.sep + filename
                    try:
                        with io.open(filepath, "r", encoding="utf-8") as mfile:
                            my_unicode_string = mfile.read()
                        possibleLanguage = "\tDetected language is likely to "
                        possibleLanguage += "be \"" + detect(my_unicode_string)
                        possibleLanguage += "\"\n"
                        detectedlang = detectedlang + 1
                    except Exception:
                        possibleLanguage = "\tLanguage detection failed\n"
    info = "Found subtitle files " + bold(str(total)) + " of which "
    info += bold(str(incorrect)) + " are incorrectly named!"
    printNotificationInfo(info)