def getShortLink(link): if matchKey(link, ['weibo.', 'twitter.', 't.me/']): return raw_link = getRawLink(link) if isCN(export_to_telegraph.getTitle(raw_link)): return shorter(raw_link, link) if isCN(export_to_telegraph.getTitle(link)): return link
def containCN(text): if not text: return False for c in text: if isCN(c): return True return False
def isCNIndex(key): text = index.get(key) if not text: return False for c in text: if isCN(c): return True return False
def calculateToSimplified(toSimplified, noAutoConvert, title): if toSimplified: return True if noAutoConvert: return False for c in title: if isCN(c) and not hanzidentifier.is_simplified(c): return True return False
def suspiciousUser(user): display = getDisplayUser(user) if isCN(display): return False if len(''.join(display.split())) <= 2: return True if display.lower() == 'deleted account': return True return False
def en2zhPiece(text): if not text.strip() or text.startswith('['): return text if isCN(text) or len(text) < 3: return text print('translating', text) result = translator.translate(text) l_char_len = len(text) - len(text.lstrip()) l_char = text[:l_char_len] r_char_len = len(text) - len(text.rstrip()) if not r_char_len: r_char = '' else: r_char = text[-r_char_len:] return l_char + result + r_char
def shouldDelay(channel): key = channel + '/0' if timestamp.get(key) < time.time() - 60 * 24 * 60 * 60: return True if channel not in suspect._db.items: return False for ch in index.get(key, ''): if isCN(ch): return False for ch in index.get(key, ''): try: if matchKey(unicodedata.name(ch), ['arabic', 'cyrillic']): return True except Exception as e: ... return False
def videoFilter(title, v_len, author, _, raw): if matchKey(title, ['镇魂', '白宇', '朱一龙']) and v_len > 60 * 20: return False if matchKey(title, ['丁毅', '聰明的一休', '準提咒']): return False if matchKey(author, ['sunfirekiss', '丁毅', 'YaleUniversity']): return False if matchKey(raw, ['Human Behavioral Biology']): return False if matchKey(author, ['bach', 'piano']) or matchKey(title, ['bach', 'piano']): return False if v_len > 10 * 60: return False if isCN(title): return False return True
def en2zh(text): soup = BeautifulSoup(text, 'html.parser') final_result = [] for item in soup: if item.name == 'a': final_result.append(str(item)) continue for line in str(item).split('\n'): if not line.strip(): continue if isCN(line): final_result.append(line) continue try: line_result = [trans_microsoft.translate(line)] except: line_result = [] line_result.append(trans_google.translate(line, dest='zh-CN').text) line_result = [ line.strip() for line in line_result if isValid(line) ] final_result += line_result return '\n\n'.join(final_result)
def tooShort(text): if isCN(text): return len(text) <= 1 return len(text) <= 4
def isSimplified(text): cn = sum([isCN(c) + hanzidentifier.is_simplified(c) for c in text]) for c in text: if isCN(c) and not hanzidentifier.is_simplified(c): return False return cn * 2 >= len(text)
def matchLanguage(channel, status_text): if not credential['channels'][channel].get('chinese_only'): return True return isCN(status_text)