Пример #1
0
 def _get_lang(lang: str):
     if len(lang) == 2:
         return languages.get(alpha_2=lang)
     elif len(lang) == 3:
         return languages.get(alpha_3=lang)
     else:
         return None
Пример #2
0
def language(field):
    """Check if a language is valid ISO 639-1 (alpha 2) or ISO 639-3 (alpha 3).

    Prints the value if it is invalid.
    """

    # Skip fields with missing values
    if pd.isna(field):
        return

    # need to handle "Other" values here...

    # Try to split multi-value field on "||" separator
    for value in field.split("||"):

        # After splitting, check if language value is 2 or 3 characters so we
        # can check it against ISO 639-1 or ISO 639-3 accordingly.
        if len(value) == 2:
            if not languages.get(alpha_2=value):
                print(
                    f"{Fore.RED}Invalid ISO 639-1 language: {Fore.RESET}{value}"
                )
        elif len(value) == 3:
            if not languages.get(alpha_3=value):
                print(
                    f"{Fore.RED}Invalid ISO 639-3 language: {Fore.RESET}{value}"
                )
        else:
            print(f"{Fore.RED}Invalid language: {Fore.RESET}{value}")

    return
Пример #3
0
 def get(name=None, part1=None, part3=None):
     if (part3 is not None):
         return _copy_fields(pyc_languages.get(alpha_3=part3))
     if (part1 is not None):
         return _copy_fields(pyc_languages.get(alpha_2=part1))
     if (name is not None):
         return _copy_fields(pyc_languages.get(name=name))
def get_language_from_wiktionary_code(code):
    lang = None
    try:
        lang = languages.get(iso639_1_code=code)
    except KeyError:
        pass
    if not lang:
        try:
            lang = languages.get(iso639_3_code=code)
        except KeyError:
            pass
    if not lang:
        try:
            lang = languages.get(iso639_2T_code=code)
        except KeyError:
            pass
    if not lang:
        code = code.split('-')[0]
        try:
            lang = iso.get(part5=code)
            setattr(lang, 'iso639_3_code', lang.part3)
        except KeyError:
            # print("no lang for: ", code)
            pass
    return lang
Пример #5
0
 def get(cls, language):
     try:
         if PYCOUNTRY:
             c = languages.lookup(language)
             return Language(c.alpha_2, c.alpha_3, c.name,
                             getattr(c, "bibliographic", None))
         else:
             l = None
             if len(language) == 2:
                 l = languages.get(alpha2=language)
             elif len(language) == 3:
                 for code_type in ['part2b', 'part2t', 'part3']:
                     try:
                         l = languages.get(**{code_type: language})
                         break
                     except KeyError:
                         pass
                 if not l:
                     raise KeyError(language)
             else:
                 raise KeyError(language)
             return Language(l.alpha2, l.part3, l.name, l.part2b
                             or l.part2t)
     except (LookupError, KeyError):
         raise LookupError("Invalid language code: {0}".format(language))
Пример #6
0
    def populate_db(self, values, subjects, filename):
        author = {}
        book = {}
        subject_ids = []
        print('file', filename)
        author_related = [
            "name", "date_of_birth", "date_of_death", "wiki_link"
        ]
        for key, value in values.items():
            # print("value", value)
            if key in author_related:
                author[key] = value
            if key == "language":
                if len(value) == 2:
                    data = {
                        "iso_639_1": value,
                        "iso_639_2": lan.get(alpha_2=value).alpha_3,
                        "name": lan.get(alpha_2=value).name
                    }
                else:
                    data = {
                        "iso_639_1": value,
                        "iso_639_2": value,
                        "name": value
                    }
                try:
                    lang_id = self.db.language.insert_one(data).inserted_id
                except DuplicateKeyError:

                    lang_id = self.db.language.find_one({
                        "iso_639_1": value
                    }).get("_id")
            elif key == "title":
                book[key] = value
        try:
            author_id = self.db.author.insert_one(author).inserted_id
        except DuplicateKeyError:
            author_id = self.db.author.find_one({
                'name':
                author.get('name', None)
            }).get("_id")
        for subject in subjects:
            try:
                subj_id = self.db.subject.insert_one({
                    'description': subject
                }).inserted_id
                subject_ids.append(subj_id)
            except DuplicateKeyError:
                subject_ids.append(
                    self.db.subject.find_one({
                        'description': subject
                    }).get('_id'))
        try:
            book['author'] = author_id
            book['language'] = lang_id
            book['subjects'] = subject_ids
            book['number'] = int(re.search(r'\d+', filename).group())
            self.db.book.insert_one(book)
        except DuplicateKeyError:
            pass
Пример #7
0
 def get(name=None, part1=None, part3=None):
     if (part3 is not None):
         return _copy_fields(pyc_languages.get(alpha_3=part3))
     if (part1 is not None):
         return _copy_fields(pyc_languages.get(alpha_2=part1))
     if (name is not None):
         return _copy_fields(pyc_languages.get(name=name))
Пример #8
0
	def __init__(self,title,length,relativeDays,lang,mp,pattern,has_gamma=True,start=0,archLink=None,archLink_fmt=None,archTime_fmt=None):
		""" Promotion pattern in a given Wiki

		title: Name of the Promotion pattern (type: str)
		length: Length of display during promotion (type: int)
		relativeDays:
		lang:
		mp: Name of language main page in pv database (e.g. Main_Page for en)
		pattern: Regular expression for promotion pattern (type: regex)
		has_gamma: True is there is a sharp decay in page view pattern (e.g. by Today's Featured article)
		start: The start time of the promotion pattern (e.g. 25 for On this Day)
		archive: Link to site with archive of promotion pattern (type: string)
		arch_fmt: strftime format of specified time in link to archive of promotion pattern
		"""
		self.title = title
		self.relativeDays = relativeDays
		self.length = length
		self.mp_title = mp
		self.has_gamma = has_gamma
		self.start = start
		try:
			languages.get(iso639_1_code=lang)
			self.lang = lang
		except KeyError:
			raise KeyError('Language "'+lang+'" does not exist')

		if archLink:
			self.scrapePattern = ScrapePattern(lang,pattern,archLink,archLink_fmt,archTime_fmt)
Пример #9
0
	def run(self,artDict):
		for lang in artDict:
			try:
				languages.get(iso639_1_code=lang)
			except KeyError:
				raise KeyError('Language "'+lang+'" does not exist')

		current = datetime(self.start.year,self.start.month,self.start.day,self.start.hour,0,0)
		while current < self.end:
			results = processFile(current,artDict)
			makeOutput(current,results)

			current += timedelta(hours=1)
def get_language(multilanguage, doc, text, default_lingo, supported_lingos):
    '''returns a list or string of language(s) and text(s) per input text'''

    '''if len(doc.text) >= 50 and doc.text.isdigit() == False:'''
    if multilanguage == "n":
        textlist = text
        langcode = doc._.language['language']
        if langcode != "UNKNOWN":
            langlist = languages.get(alpha_2=langcode).name
            if langlist not in supported_lingos:
                langlist = default_lingo
                print(
                    "Language detection probably not successfull, using default language.")
        else:
            langlist = default_lingo
            print(
                "Language detection not successfull, using default language.")
    else:
        langlist = []  # the list of all unique detected languages
        textlist = []  # the list which contains the string of text for each unique languages
        removerow = []  # just an index of rows
        count_sents = 0
        '''doc = nlp(text)'''
        for sent in doc.sents:
            count_sents += 1
            langcode = sent._.language['language']
            if langcode != "UNKNOWN":
                langname = languages.get(alpha_2=langcode).name
                if langname in supported_lingos:
                    if langname not in set(langlist):
                        langlist.append(langname)
                        textlist.append(sent.text)
                    else:
                        for k in range(0, len(langlist)):
                            if langname == langlist[k]:
                                textlist[k] += " "+sent.text
        for i in range(len(langlist)):
            percentageoflingo = int(
                100*len(textlist[i])/len(''.join(textlist)))
            if percentageoflingo < 2 and not len(textlist[i]) > 500:
                removerow.append(i)
                # or len(textlist[i]) < 50:
        # wir löschen also jenen Teil des Textes, der eine der obrigen Bedingungen erfüllt
        # reverse the order so that not the first elements
        # are deleted before the latter ones, which would cause errors
        for j in sorted(removerow, reverse=True):
            del langlist[j]
            del textlist[j]
    return langlist, textlist
Пример #11
0
    def __init__(self, key, data):
        self.key = key
        if not isinstance(data, dict):
            self.filename = data
            # TODO allow other data formats?
            with open(data, 'r', encoding='UTF-8') as fh:
                data = json.load(fh)
        else:
            self.filename = data.get('filename')
        logger.info(
            f'Loading {self.__class__.__name__.lower()} for {str(self.filename)}'
        )
        self.all_data = data['values']

        if len(data['language']) > 3 or data['language'][0].isupper():
            self.language = languages.get(name=data['language'])
        elif len(data['language']) == 3:
            self.language = languages.get(alpha_3=data['language'])
        else:
            self.language = languages.get(alpha_2=data['language'])

        # allow bare lists of strings as well as lists of dicts
        if self.all_data and isinstance(self.all_data[0], str):
            self.data = [item for item in self.all_data]
        else:
            self.data = [item[key] for item in self.all_data]

        # detect if we're using multiple texts per instance
        self.multi_ref = isinstance(self.data[0], list)
        # tokenize & keep a list and a whitespace version
        tokenize_func = default_tokenize_func(self.language)
        if self.multi_ref:
            self._tokenized = [[tokenize_func(i) for i in inst]
                               for inst in self.data]
            self._ws_tokenized = [[' '.join(i) for i in inst]
                                  for inst in self._tokenized]
            self._lc_tokenized = [[[w.lower() for w in ref] for ref in inst]
                                  for inst in self._tokenized]
            self._nopunct_lc_tokenized = [[[
                w for w in ref if w not in self.PUNCTUATION
            ] for ref in inst] for inst in self._lc_tokenized]
        else:
            self._tokenized = [tokenize_func(ref) for ref in self.data]
            self._ws_tokenized = [' '.join(ref) for ref in self._tokenized]
            self._lc_tokenized = [[w.lower() for w in ref]
                                  for ref in self._tokenized]
            self._nopunct_lc_tokenized = [[
                w for w in ref if w not in self.PUNCTUATION
            ] for ref in self._lc_tokenized]
Пример #12
0
def language_info(lang_code):
    if lang_code.endswith('-'):
        lang_code = lang_code[:-1]
    if len(lang_code) == 2:
        try:
            return languages.get(iso639_1_code=lang_code)
        except:
            return None
    try:
        return languages.get(iso639_3_code=lang_code)
    except:
        try:
            return languages.get(iso639_2T_code=lang_code)
        except:
            return None
def get_wikt_code_from_iso639_3(code):
    lang = languages.get(iso639_3_code=code)
    if lang.iso639_1_code:
        return lang.iso639_1_code
    if lang.iso639_3_code:
        return lang.iso639_3_code
    logging.debug('Do not know the Wikt code for {}'.format(code))
Пример #14
0
    def crawl_document(self, document):
        foreign_id = '%s:%s' % (self.DC_INSTANCE, document.get('id'))

        if self.skip_incremental(foreign_id):
            return

        meta = self.make_meta({
            'source_url': document.get('canonical_url'),
            'title': document.get('title'),
            'author': document.get('account_name'),
            'foreign_id': foreign_id,
            'file_name': os.path.basename(document.get('pdf_url')),
            'extension': 'pdf',
            'mime_type': 'application/pdf'
        })
        try:
            created = parse(document.get('created_at'))
            meta.add_date(created.date().isoformat())
        except:
            pass
        try:
            lang = languages.get(iso639_3_code=document.get('language'))
            meta.add_language(lang.iso639_1_code)
        except:
            pass

        self.emit_url(meta, document.get('pdf_url'))
 def decode_language_ios_639(lang):
     try:
         from pycountry import languages
         return languages.get(iso639_1_code=lang).name
     except Exception as ex:
         self.logger.warning('Exception with language decoding according to ISO 693-1: {0}'.format(str(ex)))
     return 'Unknown'
Пример #16
0
class VideoSubtitle(VideoS3):
    """A VTT file that provides captions for a Video"""

    filename = models.CharField(max_length=1024, null=False, blank=True)
    language = models.CharField(
        max_length=2,
        null=False,
        blank=True,
        default=languages.get(name="English").alpha_2,
    )
    unique_together = (("video", "language"), )

    @property
    def language_name(self):
        """
        Gets the name associated with the language code
        """
        return languages.get(alpha_2=self.language).name

    def __str__(self):
        return "{}: {}: {}".format(self.video.title, self.s3_object_key,
                                   self.language)

    def __repr__(self):
        return "<VideoSubtitle: {self.s3_object_key!r} {self.language!r} >".format(
            self=self)
Пример #17
0
 def get(cls, language):
     try:
         lang = (languages.get(alpha_2=language)
                 or languages.get(alpha_3=language)
                 or languages.get(bibliographic=language)
                 or languages.get(name=language))
         if not lang:
             raise KeyError(language)
         return Language(
             # some languages don't have an alpha_2 code
             getattr(lang, "alpha_2", ""),
             lang.alpha_3,
             lang.name,
             getattr(lang, "bibliographic", ""))
     except (LookupError, KeyError):
         raise LookupError(f"Invalid language code: {language}")
Пример #18
0
    def extract_video(self, video):
        language = video['language']
        if language in (None, 'none'):
            language = 'en'

        return items.VideoItem(
            title=self.extract_title(video),
            summary='',
            description=video['description'],
            category=self.get_event_url(video),
            quality_notes='',
            language=languages.get(iso639_1_code=language).name,
            copyright_text=self.LICENSE_TYPES.get(video['license'], video['license']),
            speakers=self.extract_speakers(video),
            thumbnail_url=video['pictures']['sizes'][-1]['link'],
            duration=video['duration'] * 60,  # API gives duration in minutes
            source_url=self.get_source_url(video),
            recorded=video['created_time'][0:10],
            slug=utils.slugify(video['name']),
            tags=video['tags'],
            videos=[items.VideoField(
                length=video['duration'] * 60,  # API gives duration in minutes
                url=video['link'],
                type='vimeo')]
        )
Пример #19
0
def get_language(post_content):
    try:
        lang_detect = detect(post_content)
        post_lang = languages.get(alpha_2=lang_detect).name
    except:
        post_lang = 'unknown'
    return post_lang
Пример #20
0
    def parse_video(self, response):
        """Parse Video and build a VideoItem"""
        payload = json.loads(response.body.decode())

        data = payload['items'][0]

        snippet = data['snippet']
        thumbnail = snippet['thumbnails'].get('standard', snippet['thumbnails']['maxres'])

        url = self.WEB_VIDEO_URL.format(video_id=data['id'])
        duration = utils.duration_as_seconds(data['contentDetails']['duration'])

        yield items.VideoItem(
            title=snippet['title'],
            summary='',
            description=snippet['description'],
            category=response.meta['event']['title'],
            quality_notes=data['contentDetails']['definition'],
            language=languages.get(iso639_1_code=snippet.get('defaultAudioLanguage', 'en')).name,
            copyright_text=self.LICENSE_TYPES.get(data['status']['license'], data['status']['license']),
            thumbnail_url=thumbnail['url'],
            duration=duration,
            source_url=url,
            recorded=snippet['publishedAt'][0:10],
            slug=utils.slugify(snippet['title']),
            tags=[],
            speakers=self.extract_speakers(snippet['description']),
            videos=[{
                'length': duration,
                'url': url,
                'type': 'youtube',
            }]
        )
Пример #21
0
def _detect_message_language(message):
    lang = detect_langs(message)[0]
    if (lang.prob > 0.99):
        lang = lang.lang
    else:
        lang = 'en'
    return languages.get(alpha_2=lang).name
Пример #22
0
    def get_language_name(code):
        if code is None:
            return ""

        language = languages.get(alpha2=code)
        if language is not None:
            return language.name
Пример #23
0
    def getStats(self, pretty=False):
        statsList = [word.parentLanguages.stats for word in self.wordObjects]
        stats = {}
        for item in statsList:
            if len(item) > 0:
                for lang, perc in item.items():
                    if lang not in stats:
                        stats[lang] = perc
                    else:
                        stats[lang] += perc
        allPercs = sum(stats.values())
        for lang, perc in stats.items():
            stats[lang] = ( perc / allPercs ) * 100

        if pretty:
            prettyStats = {}
            for lang, perc in stats.items():
                try: 
                    prettyLang = languages.get(iso639_3_code=lang).name
                except: 
                    prettyLang = "Other Language" 
                prettyStats[prettyLang] = round(perc, 2) # rename the key
            return prettyStats
        else:
            return stats
Пример #24
0
    def getStats(self, pretty=False):
        statsList = [word.parentLanguages.stats for word in self.wordObjects]
        stats = {}
        for item in statsList:
            if len(item) > 0:
                for lang, perc in item.items():
                    if lang not in stats:
                        stats[lang] = perc
                    else:
                        stats[lang] += perc
        allPercs = sum(stats.values())
        for lang, perc in stats.items():
            stats[lang] = (perc / allPercs) * 100

        if pretty:
            prettyStats = {}
            for lang, perc in stats.items():
                try:
                    prettyLang = languages.get(alpha_3=lang).name
                except:
                    prettyLang = "Other Language"
                prettyStats[prettyLang] = round(perc, 2)  # rename the key
            return prettyStats
        else:
            return stats
 def decode_language_ios_639(lang):
     try:
         from pycountry import languages
         return languages.get(iso639_1_code=lang).name
     except Exception as ex:
         self.logger.warning('Exception with language decoding according to ISO 693-1: {0}'.format(str(ex)))
     return 'Unknown'
Пример #26
0
    def crawl_document(self, document):
        foreign_id = '%s:%s' % (self.DC_INSTANCE, document.get('id'))

        if self.skip_incremental(foreign_id):
            return

        meta = self.make_meta({
            'source_url':
            document.get('canonical_url'),
            'title':
            document.get('title'),
            'author':
            document.get('account_name'),
            'foreign_id':
            foreign_id,
            'file_name':
            os.path.basename(document.get('pdf_url')),
            'extension':
            'pdf',
            'mime_type':
            'application/pdf'
        })
        try:
            created = parse(document.get('created_at'))
            meta.add_date(created.date().isoformat())
        except:
            pass
        try:
            lang = languages.get(iso639_3_code=document.get('language'))
            meta.add_language(lang.iso639_1_code)
        except:
            pass

        self.emit_url(meta, document.get('pdf_url'))
Пример #27
0
    def crawl_document(self, document):
        foreign_id = '%s:%s' % (self.DC_INSTANCE, document.get('id'))

        if self.skip_incremental(foreign_id):
            return

        document = self.create_document(foreign_id=foreign_id)
        document.source_url = document.get('canonical_url')
        document.title = document.get('title')
        document.author = document.get('author')
        document.file_name = os.path.basename(document.get('pdf_url'))
        document.mime_type = 'application/pdf'

        try:
            created = parse(document.get('created_at'))
            document.add_date(created.date().isoformat())
        except:
            pass
        try:
            lang = languages.get(iso639_3_code=document.get('language'))
            document.add_language(lang.iso639_1_code)
        except:
            pass

        self.emit_url(document, document.get('pdf_url'))
def get_question_answer():
    """
    get answer for a given question
    :return: json object/error HTTP response
    """
    try:
        content_type = request.content_type
        if "form-" in content_type:
            # for key in request.form.keys():
            #     print("{}:{}".format(key, request.form[key]))
            message = request.form["Body"]
            num_media = 0
            if "NumMedia" in request.form.keys():
                num_media = request.form[
                    "NumMedia"]  # check if user sent any media msg (e.g. voice, picture)
            if len(message) == 0 and int(num_media) > 0:
                message = twilio_client.messages.create(
                    body=
                    "Sorry, I can only answer to textual messages at the moment! 😉🧐",
                    from_=request.form["To"],
                    to=request.form["From"],
                )

            user_id = request.form["From"].replace("whatsapp:", "")
        if len(
                message
        ) > 2:  # if the length of message is more than 2 characters then check the language; TextBlob library requires a sentence/word with at least 3 characters to detect a language
            blob = TextBlob(message)
            query_language = blob.detect_language()
            if query_language is not None:
                lang_name = languages.get(alpha_2=query_language)
                if lang_name is None:
                    result = "I don't understand your language 🧐"
                else:
                    if lang_name.name == "English":
                        result = rule_controller.answer_question(
                            user_id, message)
                    else:
                        result = "I can only talk in *English* 🇬🇧 at the moment, but soon I will be able to talk in _{}_ 😎".format(
                            lang_name.name)
            else:
                result = "I don't understand your language 🧐"
        else:
            result = rule_controller.answer_question(user_id, message)

        if result is not None:  # in case of handovering user's question to a human, we do not return anything here
            message = twilio_client.messages.create(
                body=result,
                from_=request.form["To"],
                to=request.form["From"],
            )
        return "OK"  # this is to fix the "The view function did not return a valid response" error as we do not return a Response to twilio api, we use twilio library instead.
    except Exception as err:
        logger.error(str(err))
        message = twilio_client.messages.create(
            body="Oops! Something wrong happened on my side!",
            from_=request.form["To"],
            to=request.form["From"],
        )
        return "Not OK!"
Пример #29
0
def iso_to_name(iso):
    if not iso:
        return 'Unknown'
    try:
        language = languages.get(alpha2=iso)
    except KeyError as e:
        return 'Unknown'
    return language.name
Пример #30
0
def get_lang_readable(coms_lng):
    """
    Maps list of two character language code to full language name
    """
    return [
        languages.get(alpha_2=lng).name if lng != "Unknown" else lng
        for lng in coms_lng
    ]
Пример #31
0
    def detect_language(self, column = ''):
        '''
        This function will search through the Pandas DataFrame column of
        textual data to detect the language of the corpus.

        ------------
        Argument
            column(string):
                If specified, it will be used when generating summary. If it is an empty string,
                the default one will be used
        ------------
        Return
            pd.DataFrame: contains one column having information about the language
        ------------
        Example
            >>> df = nlp.NLPFrame({'text_col' : ['I love travelling to Japan and
                                    eating Mexican food but I can only speak
                                    English!']})

            >>> df.detect_language()
            

            [1]   language
               0  English

        ------------
        '''

        if not fasttext_dependencies(self.fasttext_model):
            print('Dependencies are not met. Please read the instructions or contact the developers for further details')
            return None


        column = column if column else self.column
        if not column:
            raise ValueError('There is no column with text in the NLPFrame')
        try:
            pd_df_col = self.__getitem__(column)
        except KeyError:
            raise ValueError(f"The column {column} doesn't exist in the NLPFrame")


        # path = 'model/lid.176.bin'
        # if not os.path.isfile(path):
        #     try:
        #         print('Downloading fasttext pre-trained model')
        #         
        #         url = 'https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin'
        #         wget.download(url, path)
        #     except:
        #         print('Something went wrong when downloading!!')
        #         return False      

        model = fasttext.load_model(self.fasttext_model)
        predictions = model.predict(''.join(pd_df_col))
        result = predictions[0][0][-2:]
        language = languages.get(alpha_2 = result)
        return pd.DataFrame({'language': [language.name]})
Пример #32
0
def standardize_language(code):
    """Match `code` to a standard RFC5646 or RFC3066 language. The following
    approaches are tried in order:
    * Match a RFC5646 language string.
    * Match a RFC3066 language string.
    * Use a ISO-6639/2 bibliographic synonym, and match a RFC3066 language
    string for the ISO-6639/2 terminological code.
    If no results are found, `None` is returned.

    http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dclanguage
    http://www.idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2.12

    :param code: string with a language code ('en-GB', ...)
    :returns: `LanguageTuple` with the RFC5646 code and the list of description
    tags, or `None` if the language could not be identified.
    """
    if not code:
        return None

    # Try RFC5646 (for EPUB 3).
    if tags.check(code):
        return LanguageTuple(code=code.lower(),
                             description=tags.description(code))

    # Try RFC3066 (for EPUB 2).
    # Try to get the ISO639-1 code for the language.
    try:
        lang = languages.get(iso639_2T_code=code)
        new_code = lang.iso639_1_code
    except KeyError:
        # Try synonym.
        if code in ISO_6639_2_B.keys():
            try:
                lang = languages.get(iso639_2T_code=ISO_6639_2_B[code])
                new_code = lang.iso639_1_code
            except KeyError:
                return None
        else:
            return None

    # Try RFC5646 for the ISO639-1 code.
    if tags.check(new_code):
        return LanguageTuple(code=new_code.lower(),
                             description=tags.description(new_code))
    return None
Пример #33
0
def normalize_language(language):
    for lookup_key in ("alpha_2", "alpha_3"):
        try:
            language = languages.get(**{lookup_key: language})
            return language.name.lower()
        except KeyError:
            pass

    return language
Пример #34
0
def display_language_name_filter(ctx, locale):
    language_code, _, country_code = locale.partition('_')
    term_code = languages.get(bibliographic=language_code).terminology

    try:
        return Locale.parse(term_code).language_name
    except UnknownLocaleError:
        # Fallback value is the generated value in English or the code
        return constants.LANGUAGES.get(term_code, locale)
Пример #35
0
def normalize_language(language):
    for lookup_key in ("alpha_2", "alpha_3"):
        try:
            from pycountry import languages  # get ISO list of languages for lookup
            language = languages.get(**{lookup_key: language})
            return language.name.lower()
        except KeyError:
            pass
    return language
Пример #36
0
def normalize_language(language):
    for lookup_key in ("alpha_2", "alpha_3"):
        try:
            lang = languages.get(**{lookup_key: language})
            if lang:
                language = lang.name.lower()
        except KeyError:
            pass

    return language
 def print_languages_ios_639(langauge_set):
     try:
         from pycountry import languages
         result = ''
         for lang in langauge_set:
             result += '{0}:{1}\n'.format(lang, languages.get(iso639_1_code=lang).name)
         return result
     except Exception as ex:
         self.logger.warning('Exception with language decoding according to ISO 693-1: {0}'.format(str(ex)))
     return None
 def print_languages_ios_639(langauge_set):
     try:
         from pycountry import languages
         result = ''
         for lang in langauge_set:
             result += '{0}:{1}\n'.format(lang, languages.get(iso639_1_code=lang).name)
         return result
     except Exception as ex:
         self.logger.warning('Exception with language decoding according to ISO 693-1: {0}'.format(str(ex)))
     return None
Пример #39
0
def get_iso_lang_code(lang):
    """
    Convert two-digit iso code into three-digit iso code
    :param lang: Two digit iso code
    :return: Three digit iso code
    """
    iso_lang = languages.get(alpha_2=lang)
    if iso_lang is not None:
        if hasattr(iso_lang, "bibliographic"):
            return getattr(iso_lang, "bibliographic")
        else:
            return getattr(iso_lang, "alpha_3")
Пример #40
0
def language(v):
    if isinstance(v, languages.data_class_base):
        return v
    for k in 'name alpha_3 alpha_2'.split():
        try:
            r = languages.get(**{k: v})
        except KeyError:
            pass
        else:
            if r is not None:
                return r
    raise ValueError('invalid language: %r' % v)
Пример #41
0
def get_lang(code, iso639=None):
    """Returns a language instance from the given code."""
    code_head = code.split('.', 1)[0]
    if len(code_head) < 2 or len(code_head) > 3:
        raise ValueError('%r is an invalid language code' % code)

    def make_lang(code, submods):
        try:
            code = '.'.join([code] + list(submods))
            return import_lang_module(code).__lang__()
        except ImportError:
            raise ValueError('Hangulize does not support %s' % code)

    # split module path
    if '.' in code:
        code = code.split('.')
        submods = code[1:]
        code = code[0]
    else:
        submods = ()
    # guess if ISO 639-1 or 639-3
    if iso639 is None:
        if len(code) == 2:
            iso639 = 1
        elif len(code) == 3:
            iso639 = 3
    try:
        # fix the warning when importing pycountry
        import logging
        from pycountry import languages
        if not getattr(logging, 'NullHandler', None):

            class NullHandler(logging.Handler):
                def emit(self, record):
                    pass

            logging.NullHandler = NullHandler
        logging.getLogger('pycountry.db').addHandler(logging.NullHandler())
        attr = ['alpha2', 'bibliographic', 'terminology'][iso639 - 1]
        code = languages.get(**{attr: code}).terminology
    except TypeError:
        # out of 2~3 characters
        raise ValueError('%r is an invalid language code' % code)
    except KeyError:
        try:
            return make_lang(code, submods)
        except ValueError:
            raise ValueError('{0!r} is an invalid ISO 639-{1} code'
                             ''.format(code, iso639))
    except ImportError:
        if iso639 != 3:
            raise ImportError('ISO 639-%d requires pycountry' % iso639)
    return make_lang(code, submods)
Пример #42
0
    def get_display_name(self, locale):
        language_code, _, country_code = locale.partition('_')
        term_code = languages.get(bibliographic=language_code).terminology

        available_languages = dict(literal_eval(
            (self.settings.get('available_languages', '[]'))))

        try:
            return Locale.parse(term_code).language_name
        except UnknownLocaleError:
            # Fallback value is the generated value in English or the code
            return available_languages.get(locale, locale)
Пример #43
0
    def get_display_name(self, locale):
        language_code, _, country_code = locale.partition('_')
        term_code = languages.get(bibliographic=language_code).terminology

        available_languages = dict(
            literal_eval((self.settings.get('available_languages', '[]'))))

        try:
            return Locale.parse(term_code).language_name
        except UnknownLocaleError:
            # Fallback value is the generated value in English or the code
            return available_languages.get(locale, locale)
Пример #44
0
    def execute(self, maybe_code):
        if isinstance(maybe_code, dict):
            maybe_code = maybe_code['#text']
        # Force indices to populate
        if not languages._is_loaded:
            languages._load()

        for kwarg in languages.indices.keys():
            try:
                return languages.get(**{kwarg: maybe_code}).iso639_3_code
            except KeyError:
                continue
        return None
Пример #45
0
    def _get_subtitle_info(self):
        lang = languages.get(alpha2=self.subtitle_language).name
        to_search = self.strip_version(self.subtitle_release)

        sub_dict_list = self.__search_subtitles()
        for sub_dict in sub_dict_list:
            sub_regex = compile(self.strip_version(sub_dict['release_name']), flags=IGNORECASE)
            lang_regex = compile(sub_dict['lang'], flags=IGNORECASE)

            if sub_regex.match(to_search) and lang_regex.match(lang):
                self.__sub_url = sub_dict['url']
                return True
        return False
Пример #46
0
def translate_iana_language_code_to_iso_639_3(iana_lang_code):
    """
    Translates the 2 character iana_lang_code provided into the appropriate
    ISO-639 3 character language code
    :param iana_lang_code:
    :return: str corresponding ISO-639 3 character language code
    """
    iso_lang_code = iana_lang_code

    try:
        language = languages.get(iso639_1_code=iana_lang_code)
        iso_lang_code = language.iso639_3_code
    except Exception, e:
        log.error('Error retrieving ISO-639 language code: %s', e)
Пример #47
0
def get_lang(code, iso639=None):
    """Returns a language instance from the given code."""
    code_head = code.split('.', 1)[0]
    if len(code_head) < 2 or len(code_head) > 3:
        raise ValueError('%r is an invalid language code' % code)
    def make_lang(code, submods):
        try:
            code = '.'.join([code] + list(submods))
            return import_lang_module(code).__lang__()
        except ImportError:
            raise ValueError('Hangulize does not support %s' % code)
    # split module path
    if '.' in code:
        code = code.split('.')
        submods = code[1:]
        code = code[0]
    else:
        submods = ()
    # guess if ISO 639-1 or 639-3
    if iso639 is None:
        if len(code) == 2:
            iso639 = 1
        elif len(code) == 3:
            iso639 = 3
    try:
        # fix the warning when importing pycountry
        import logging
        from pycountry import languages
        if not getattr(logging, 'NullHandler', None):
            class NullHandler(logging.Handler):
                def emit(self, record):
                    pass
            logging.NullHandler = NullHandler
        logging.getLogger('pycountry.db').addHandler(logging.NullHandler())
        attr = ['alpha2', 'bibliographic', 'terminology'][iso639 - 1]
        code = languages.get(**{attr: code}).terminology
    except TypeError:
        # out of 2~3 characters
        raise ValueError('%r is an invalid language code' % code)
    except KeyError:
        try:
            return make_lang(code, submods)
        except ValueError:
            raise ValueError('%r is an invalid ISO 639-%d code' % \
                             (code, iso639))
    except ImportError:
        if iso639 != 3:
            raise ImportError('ISO 639-%d requires pycountry' % iso639)
    return make_lang(code, submods)
Пример #48
0
def get_languages_iso3(codes):
    if codes is None:
        codes = []

    supported = []
    for lang in codes:
        if lang is None or len(lang.strip()) not in [2, 3]:
            continue
        lang = lang.lower().strip()
        if len(lang) == 2:
            try:
                c = languages.get(iso639_1_code=lang)
                lang = c.iso639_3_code
            except KeyError:
                continue
        supported.append(lang)

    supported.append("eng")
    return "+".join(sorted(set(supported)))
Пример #49
0
def display_language_name_filter(ctx, locale):
    language_code, _, country_code = locale.partition('_')
    term_code = languages.get(bibliographic=language_code).terminology
    return Locale.parse(term_code).language_name
Пример #50
0
def get_code(language):
    try:
        return languages.get(name=language).bibliographic
    except KeyError:
        return None
Пример #51
0
def language_code(language):
    try:
        return languages.get(name=language)
    except KeyError:
        return None