def __init__(self): self.codes = set() self.guessit_exceptions = {} self.alpha3b = babelfish.get_language_converter('alpha3b') self.alpha2 = babelfish.get_language_converter('alpha2') self.name = babelfish.get_language_converter('name') self.codes |= LANGUAGES | self.alpha3b.codes | self.alpha2.codes | self.name.codes for (alpha3, country), synlist in SYN.items(): for syn in synlist: self.guessit_exceptions[syn.lower()] = (alpha3, country, None) self.codes.add(syn)
def test_converter_opensubtitles(self): self.assertEqual(Language('fra').opensubtitles, Language('fra').alpha3b) self.assertEqual(Language('por', 'BR').opensubtitles, 'pob') self.assertEqual(Language.fromopensubtitles('fre'), Language('fra')) self.assertEqual(Language.fromopensubtitles('pob'), Language('por', 'BR')) self.assertEqual(Language.fromopensubtitles('pb'), Language('por', 'BR')) # Montenegrin is not recognized as an ISO language (yet?) but for now it is # unofficially accepted as Serbian from Montenegro self.assertEqual(Language.fromopensubtitles('mne'), Language('srp', 'ME')) self.assertEqual(Language.fromcode('pob', 'opensubtitles'), Language('por', 'BR')) with self.assertRaises(LanguageReverseError): Language.fromopensubtitles('zzz') with self.assertRaises(LanguageConvertError): Language('aaa').opensubtitles self.assertEqual(len(get_language_converter('opensubtitles').codes), 606) # test with all the languages from the opensubtitles api # downloaded from: http://www.opensubtitles.org/addons/export_languages.php f = resource_stream('babelfish', 'data/opensubtitles_languages.txt') f.readline() for l in f: idlang, alpha2, _, upload_enabled, web_enabled = l.decode('utf-8').strip().split('\t') if not int(upload_enabled) and not int(web_enabled): # do not test languages that are too esoteric / not widely available continue self.assertEqual(Language.fromopensubtitles(idlang).opensubtitles, idlang) if alpha2: self.assertEqual(Language.fromopensubtitles(idlang), Language.fromopensubtitles(alpha2)) f.close()
def test_converter_name(self): self.assertEqual(Language('eng').name, 'English') self.assertEqual(Language.fromname('English'), Language('eng')) self.assertEqual(Language.fromcode('English', 'name'), Language('eng')) with self.assertRaises(LanguageReverseError): Language.fromname('Zzzzzzzzz') self.assertEqual(len(get_language_converter('name').codes), 7874)
def test_converter_alpha3t(self): self.assertEqual(Language('fra').alpha3t, 'fra') self.assertEqual(Language.fromalpha3t('fra'), Language('fra')) self.assertEqual(Language.fromcode('fra', 'alpha3t'), Language('fra')) with self.assertRaises(LanguageReverseError): Language.fromalpha3t('zzz') with self.assertRaises(LanguageConvertError): Language('aaa').alpha3t self.assertEqual(len(get_language_converter('alpha3t').codes), 418)
def test_converter_alpha2(self): self.assertEqual(Language('eng').alpha2, 'en') self.assertEqual(Language.fromalpha2('en'), Language('eng')) self.assertEqual(Language.fromcode('en', 'alpha2'), Language('eng')) with self.assertRaises(LanguageReverseError): Language.fromalpha2('zz') with self.assertRaises(LanguageConvertError): Language('aaa').alpha2 self.assertEqual(len(get_language_converter('alpha2').codes), 184)
def __init__(self): self.name_converter = get_language_converter('name') self.from_addic7ed = {'Català': ('cat',), 'Chinese (Simplified)': ('zho',), 'Chinese (Traditional)': ('zho',), 'Euskera': ('eus',), 'Galego': ('glg',), 'Greek': ('ell',), 'Malay': ('msa',), 'Portuguese (Brazilian)': ('por', 'BR'), 'Serbian (Cyrillic)': ('srp', None, 'Cyrl'), 'Serbian (Latin)': ('srp',), 'Spanish (Latin America)': ('spa',), 'Spanish (Spain)': ('spa',)} self.to_addic7ed = {('cat',): 'Català', ('zho',): 'Chinese (Simplified)', ('eus',): 'Euskera', ('glg',): 'Galego', ('ell',): 'Greek', ('msa',): 'Malay', ('por', 'BR'): 'Portuguese (Brazilian)', ('srp', None, 'Cyrl'): 'Serbian (Cyrillic)'} self.codes = self.name_converter.codes | set(self.from_addic7ed.keys())
def scan_subtitle_languages(path): """Search for subtitles with alpha2 extension from a video `path` and return their language :param string path: path to the video :return: found subtitle languages :rtype: set """ language_extensions = tuple('.' + c for c in babelfish.get_language_converter('alpha2').codes) dirpath, filename = os.path.split(path) subtitles = set() for p in os.listdir(dirpath): if not isinstance(p, bytes) and p.startswith(os.path.splitext(filename)[0]) and p.endswith(SUBTITLE_EXTENSIONS): if os.path.splitext(p)[0].endswith(language_extensions): subtitles.add(babelfish.Language.fromalpha2(os.path.splitext(p)[0][-2:])) else: subtitles.add(babelfish.Language('und')) logger.debug('Found subtitles %r', subtitles) return subtitles
def test_converter_type(self): self.assertEqual(get_language_converter('type').codes, {'A', 'C', 'E', 'H', 'L', 'S'}) self.assertEqual(Language('eng').type, 'living') self.assertEqual(Language('und').type, 'special')
def test_converter_scope(self): self.assertEqual(get_language_converter('scope').codes, {'I', 'S', 'M'}) self.assertEqual(Language('eng').scope, 'individual') self.assertEqual(Language('und').scope, 'special')
def __init__(self): self.alpha2_converter = get_language_converter('alpha2') self.from_tvsubtitles = {'br': ('por', 'BR'), 'ua': ('ukr',), 'gr': ('ell',), 'cn': ('zho',), 'jp': ('jpn',), 'cz': ('ces',)} self.to_tvsubtitles = {v: k for k, v in self.from_tvsubtitles} self.codes = self.alpha2_converter.codes | set(self.from_tvsubtitles.keys())