def letterSwap(word): ''' Turns latin-like letters in word into cyrillic ones and reverse if fails. ''' ad = AlphabetDetector() # latin keys cyr values latin_like_cyr = {'a': 'а', 'c': 'с', 'e': 'е', 'o': 'о', 'p': 'р', 'y': 'у', 'A': 'А', 'B': 'В', 'C': 'С', 'E': 'Е', 'H': 'Н', 'K': 'К', 'M': 'М', 'O': 'О', 'P': 'Р', 'T': 'Т', 'X': 'Х'} cyr_like_latin = {v: k for k, v in latin_like_cyr.items()} for char in latin_like_cyr.keys(): word = word.replace(char, latin_like_cyr[char]) if ad.only_alphabet_chars(word, 'CYRILLIC'): return word else: for char in cyr_like_latin: word = word.replace(char, cyr_like_latin[char]) return word
def cleanText(text): ''' Function checks and repairs words with hidden latin characters in and vv. Function assuming that there are only latin and cyrillic characters in text. ''' ad = AlphabetDetector() st = RussianStemmer() is_broken = False clean_text = [] for word in text: if ad.only_alphabet_chars(word, 'CYRILLIC'): clean_text.append(word) elif ad.only_alphabet_chars(word, 'LATIN'): clean_text.append(word) else: is_broken = True clean_text.append(letterSwap(word)) clean_text = [st.stem(word) for word in clean_text] return clean_text, is_broken
def sameAlphabet(self,vLine): ad = AlphabetDetector() if len (ad.detect_alphabet(vLine.decode('utf-8'))) <= 1: return True else: return False
def isLatin(string): try: ad = AlphabetDetector() return ad.is_latin(string) except: return False
import lxml.html import re from alphabet_detector import AlphabetDetector ad = AlphabetDetector() global dictionary dictionary = {} global text text = '' punct = [',', '!', '。', '”', ':', '?', '“', '……'] global sent sent = '' def markdown(i): state = 'exists' ind = 1 while state == 'exists': workstrng = i[:ind] if workstrng in dictionary: state = 'exists' ind += 1 else: state = 'none' mark = workstrng[:-1] markup = dictionary[mark] transcr = markup[::2] transcr = '; '.join(transcr) english = markup[1::2] english = '; '.join(english) markedstr = '<w><ana lex="' + mark + '" transcr=' + transcr + '" sem="' + english + '"/>' + mark + '</w>\n' global sent
import Cutiepii_Robot.modules.sql.locks_sql as sql from Cutiepii_Robot import dispatcher, DRAGONS, LOGGER from Cutiepii_Robot.modules.disable import DisableAbleCommandHandler from Cutiepii_Robot.modules.helper_funcs.chat_status import ( can_delete, is_user_admin, user_not_admin, is_bot_admin, user_admin, ) from Cutiepii_Robot.modules.log_channel import loggable from Cutiepii_Robot.modules.connection import connected from Cutiepii_Robot.modules.helper_funcs.alternate import send_message, typing_action ad = AlphabetDetector() LOCK_TYPES = { "audio": Filters.audio, "voice": Filters.voice, "document": Filters.document, "video": Filters.video, "contact": Filters.contact, "photo": Filters.photo, "url":