def check_wiktionary_language(target_lang): seen_codes = {} for lang_name in WIKT_LANGUAGE_NAMES[target_lang]: if lang_name.startswith('Proto-'): continue code = str(langcodes.find(lang_name)) assert code not in seen_codes, \ "%r and %r have the same code" % (seen_codes[code], lang_name) seen_codes[code] = lang_name
def language_to_code(language_name): """ """ code = "" try: l = langcodes.find(language_name) code = l.language except LookupError: pass return code
async def voice(self, ctx, ttvoice): if len(ttvoice) == 2: lang = langcodes.Language.make(language=ttvoice) else: lang = langcodes.find(ttvoice) if not str(lang.display_name()) == f'Unknown language [{ttvoice}]': self.ttsvoice = str(lang.language) await ctx.send('Voice changed to %s' % lang.display_name()) else: await ctx.send('Incorrect language code.')
def yake_keywords( texts: List[str], language: str = "slovenian", max_len: int = 1, stopwords: List[str] = None, ): # yake uses lancodes instead of full language name lg = langcodes.find(language).language yake_extractor = yake.KeywordExtractor( lan=lg, n=max_len, stopwords=stopwords ) return [yake_extractor.extract_keywords(txt) for txt in texts]
def tll(self, message, language, *args): try: language_code = str(langcodes.find(language)) except LookupError: yield from self.client.send_message(message.channel, 'Could not find language') return except: yield from self.client.send_message( message.channel, 'A language lookup error occured') return yield from self.client.send_message( message.channel, translate.translate( message.content.split(' ', 2)[2], language_code))
def _create_language(self, movie_uri, lang): encoded_lang = to_turtle_fmt(lang) # I'm sorry it's the only one it doesn't detect if encoded_lang == "Castellano": lang_code = "es" else: lang_code = langcodes.find(lang).language language_uri = self.baseURI[encoded_lang] self.g.add((language_uri, RDF.type, self.dbpedia.Language)) self.g.add( (language_uri, self.dbpedia.languageCode, Literal(lang_code))) return language_uri
def languageDetect(lang): # lang = lang.lower() # lang = lang.capitalize() sp = ['es', 'ca', 'eu', 'gl'] lang = langcodes.find(lang).language if lang == 'en': lang = lang + '-GB' elif lang in sp: lang = lang + '-ES' else: raise Exception( 'Sorry, language not supported for analysis.\nPlease, select a valid language.' ) return lang
def language(self, value): # Fixme: better ??? invalid = False try: self._language = langcodes.find(value) except LookupError: try: self._language = langcodes.get(value) except: invalid = True except: invalid = True if invalid: self._language = '' if value: self._logger.warning('Unknown language {}'.format(value))
def extract_lang(self, lang: str) -> Optional[List[str]]: """ Extract language code from raw text :param lang: language raw text :return: list of language codes :rtype: list """ if not lang.strip(): return None lng = lang.strip() if lng: try: return langcodes.find(lng).language except LookupError: if lng in self.missing_languages: pass else: self.missing_languages.add(lng) print("unknown language: {}".format(lng)) return None
import langcodes from string import punctuation import os import logging import codecs from six import string_types from builtins import str # The language management should be in `pke.utils` but it would create a circular import. get_alpha_2 = lambda l: langcodes.find(l).language # lang_stopwords = {get_alpha_2(l): l for l in stopwords._fileids} lang_stem = {get_alpha_2(l): l for l in set(SnowballStemmer.languages) - set(['porter'])} lang_stem.update({'en': 'porter'}) PRINT_NO_STEM_WARNING = defaultdict(lambda: True) PRINT_NO_STWO_WARNING = defaultdict(lambda: True) def get_stopwords(lang): """Provide stopwords for the given language, or default value. If stopwords are not available for a given language, a default value is returned and a warning is displayed
def parse_track(self, item): options = {} error = False original = item # preserve for error messages item = item.replace('\r', ' ').replace('\n', ' ') try: head, _emptyStr, lang_kind, _emptyStr, tail = re.split( r"(^| )\((.*?)\)( |$)", item) lang_kind = lang_kind.split() # split input into a list of words kinds = set(lang_kind) & set(('captions', 'descriptions', 'chapters', 'metadata', 'subtitles')) # Find kind for kind in kinds: if 'kind' not in options: options['kind'] = kind else: error = True continue lang_kind.remove(kind) # Find language for lang in lang_kind: if 'language' not in options: if langcodes.code_to_names( 'language', langcodes.get( langcodes.standardize_tag(lang)).language): options['language'] = langcodes.standardize_tag(lang) else: # lang is not a lang code. Try interpreting as a language name try: options['language'] = str(langcodes.find(lang)) except: error = True continue else: error = True continue item = head + ' ' + tail except: error = True if 'kind' not in options: options['kind'] = 'subtitles' if 'language' not in options: try: options['language'] = langcodes.standardize_tag(getlocale()[0]) except: options['language'] = 'en' # find label try: head, _emptyStr, _quote, label, _emptyStr, tail = re.split( r"""(^| )(["'])(.*?)\2( |$)""", item) if head and tail: error = True item = head + tail options['label'] = label.strip() except: try: options['label'] = options['kind'].capitalize( ) + ' in ' + langcodes.get( options['language']).autonym().capitalize() except: error = True options['label'] = None # get filename options['src'] = self.uri_check(item) # return error if error: self.state_machine.reporter.error( 'Error in "%s" directive: \n Problems encountered parsing track "%s" \n\n' 'Guessing the following values: \n' 'filename: "%s" \n' 'kind: "%s" \n' 'language: "%s" \n' 'label: "%s" \n\n' 'Track kinds should be chosen from one of the following: \n' 'captions, descriptions, chapters, metadata, subtitles \n' 'Track languages should be given as BCP 47 compliant language codes. \n' 'Track declarations should take the following form: \n' 'filename (kind language_code) "label"\n' 'Tracks must have one filename and one language_code. \n' 'If a kind is not specified, "subtitles" will be assumed. \n' 'If a label is not provided, it will be auto-generated from the kind and language specified.' % (self.name, original, options['src'], options['kind'], options['language'], options['label']), nodes.literal_block(self.block_text, self.block_text), line=self.lineno) track_node = track(self.block_text, **options) return track_node
def lang_convert(self, language): return langcodes.find(language).language
sort_by = 'highest-rated' # popularity|newest timeout = 15 csv_file_name = 'Udemy Free Courses.csv' # %% # - Choose language # lc.get('en').display_name() while True: language_name = input('Please type your language (or type x to quit): ') language_err = False lang_code = '' if language_name.lower().strip() == "x": exit(0) break try: lang_code = lc.find(language_name).language except LookupError as err: print(err) language_err = True except Exception as err: print("Something went wrong") print(type(err).__name__) language_err = True if not language_err: break #print(lang_code) # %% # - Methods def generate_page_dataframe(elements_collection):
def territories_from_language(lang: str) -> List[str]: lang_iso = langcodes.find(lang).language iso2 = LANGUAGE2TERRITORIES[lang_iso] iso3 = coco.convert(iso2, src="ISO2", to="ISO3") return iso3
def language_to_iso2(lang: str) -> str: return langcodes.find(lang).language