def __init__(self, frame): self.frame = frame self.about_fn_btn = Button(self.frame, text=_("About Funing")) # language_combobox self.lang_combobox_var = tk.StringVar(self.frame) self.lang_code = settings.lang_code self.lang_combobox_var.set(Language.make(self.lang_code).autonym()) self.lang_combobox = ttk.Combobox( self.frame, textvariable=self.lang_combobox_var, values=tuple(self.locale_lang_display_names()), state="readonly", )
def find_keywords(text, keywords): # автоматически пытаемся определить язык текста новости lang = Language.make(language=detect(text)).language_name().lower() try: stemmer = SnowballStemmer(lang) except: # если такого языка не нашлось, то просто используем английский stemmer = SnowballStemmer('english') keywords = [(word.lower(), stemmer.stem(word)) for word in keywords] text = word_tokenize(text) for key in keywords: for word in text: if word == key[0] or word == key[1] or stemmer.stem( word) == key[0] or stemmer.stem(word) == key[1]: return True return False
async def cevir(client: Client, message: Message): # < Başlangıç await log_yolla(client, message) ilk_mesaj = await message.edit("__Bekleyin..__", disable_web_page_preview=True) #------------------------------------------------------------- Başlangıç > girilen_yazi = message.text cevaplanan_mesaj = message.reply_to_message if not cevaplanan_mesaj and len(girilen_yazi.split()) == 1: await ilk_mesaj.edit( "__Çeviri yapabilmem için bişeyler söyleyin ya da mesaj yanıtlayın..__" ) return if not cevaplanan_mesaj: girdi = girilen_yazi.split(" ", 1)[1] elif cevaplanan_mesaj.document: gelen_dosya = await cevaplanan_mesaj.download() veri_listesi = None with open(gelen_dosya, "rb") as oku: veri_listesi = oku.readlines() girdi = "".join(veri.decode("UTF-8") for veri in veri_listesi) os.remove(gelen_dosya) elif cevaplanan_mesaj.text: girdi = cevaplanan_mesaj.text else: await ilk_mesaj.edit("__güldük__") return await ilk_mesaj.edit("Çevriliyor...") gelen_mesaj_dili = Language.make( language=cevirici.detect(girdi).lang).display_name() cevrilmis_mesaj = cevirici.translate(girdi, dest='tr').text await ilk_mesaj.edit(f'`{gelen_mesaj_dili}`\n\n__{cevrilmis_mesaj}__')
def get_course_language(course_runs): """ Gets the languages associated with a course. Used for the "Language" facet in Algolia. Arguments: course_runs (list): list of course runs for a course Returns: list: a list of supported languages for those course runs """ languages = set() for course_run in course_runs: content_language = course_run.get('content_language') if not content_language: continue language_name = Language.make(language=content_language).language_name() languages.add(language_name) return list(languages)
def locale_lang_display_names(self): display_names = [] for i in settings.locale_langcodes: display_names.append(Language.make(i).autonym()) return display_names
def is_valid_locale(locale: str) -> bool: """Is locale string valid.""" return Language.make(locale).is_valid()
def resource_runner(self): """ Run through each resource searching for media and parse media for captions """ parser = Parser() for subclass in ResourceProvider.__subclasses__(): if subclass.name in self.vconfig.exclude: continue print("Checking " + subclass.name) self.course_name = subclass(vconfig=self.vconfig).get_course_name() retrieved_data = subclass(vconfig=self.vconfig).fetch() data = retrieved_data["info"] flat = retrieved_data["is_flat"] for content_pair in data: # Each content pair represents a page, or a discussion, etc. (Whole pages) if flat # If not flat then each pair is simply a link and a location self.to_check, self.no_check = parser.parse_content(content_pair, flat) # Validate that the media links contain captions for link in self.to_check: if link["type"] == "youtube": match = re.search(youtube_pattern, link["media_loc"]) video_id = match.group(1) r = requests.get( "https://www.googleapis.com/youtube/v3/captions?videoId={}&part=snippet&key={}".format( video_id, self.vconfig.youtube_api_key ) ) response = r.json() if not r.ok: link["captions"].append("N/A") link["meta_data"].append(response["error"]["message"]) continue try: for item in response["items"]: caption_lang_code = item["snippet"]["language"] lang_name = Language.make( language=caption_lang_code ).language_name() if item["snippet"]["trackKind"] == "ASR": link["captions"].append( "Automatic Speech Recognition: " + lang_name ) elif caption_lang_code: link["captions"].append(lang_name) except: pass if link["type"] == "vimeo": match = re.search(vimeo_pattern, link["media_loc"]) video_id = match.group(3) r = requests.get( "https://api.vimeo.com/videos/{}/texttracks".format(video_id), headers={ "Authorization": "bearer {}".format( self.vconfig.vimeo_access_token ) }, ) if not r.ok: link["captions"].append("N/A") link["meta_data"].append(response["error"]["message"]) continue response = r.json() try: for item in response["data"]: if item["language"]: caption_lang_code = item["language"] lang_name = Language.make( language=caption_lang_code ).language_name() link["captions"].append(lang_name) except: pass if len(link["captions"]) == 0: link["captions"].append("No captions")