def text_contains_emoji(text): for char in text: if emoji.demojize(char) != char: return True # Edge case: check for flags as they're represented as multiple chars: # https://en.wikipedia.org/wiki/Regional_Indicator_Symbol if len(text) == 2: spaced_string = "{} {}".format(text[0], text[1]) if emoji.demojize(spaced_string) != spaced_string: return True return False
def on_status(self, tweet): try: twitter_user_id = str(tweet.user.id) splitted = tweet.text.split(' ') if len(splitted) <= 3: emoji_status = splitted[1] is_valid_emoji = emoji.demojize(emoji_status) != emoji_status if not is_valid_emoji: app_api.update_status(status=u'@' + tweet.user.screen_name + u' try again with a single emoji: ".YoApp 😂"') return yo_access_token = redis_store.get('yo.token.for.twitter.user.id:' + twitter_user_id) if not yo_access_token: app_api.update_status(status=u'@' + tweet.user.screen_name + u' let\'s link your twitter to your Yo Status here: https://yostat.us/twitter/authorize') return response = requests.post('https://api.justyo.co/status/', json={ 'status': emoji_status, 'access_token': yo_access_token }) if response.status_code == 200: app_api.update_status(status=u'@' + tweet.user.screen_name + u' your status is now: ' + emoji) else: app_api.update_status(status=u'@' + tweet.user.screen_name + u' let\'s link your twitter to your Yo Status here: https://yostat.us/twitter/authorize') except Exception as e: print e.message
def _handle_message(self, msg): """parse a single message row""" msg['number'] = '00' + msg['number'].split('@')[0] msg['name'] = self._numberdict.get(msg['number'],msg['number']) msg['verb'] = 'to' if msg['type'] else 'from' msg['type'] = 'OUTGOING' if msg['type'] else 'INCOMING' msg['handler'] = self._args.handler if msg['text']: if self._args.demojize: msg['text'] = emoji.demojize(msg['text']) if self._args.skip_emoji: msg['text'] = re.sub(emoji.get_emoji_regexp(), '', msg['text']) timestamp = datetime.datetime.fromtimestamp(msg['timestamp'] / 1000) properties = OrgProperties(data_for_hashing=json.dumps(msg)) properties.add('NUMBER', msg['number']) properties.add('TYPE', msg['type']) output = self._args.output_format.format(**msg) if msg['text'] and not self._is_ignored(msg): self._writer.write_org_subitem(timestamp=OrgFormat.datetime(timestamp), output=output, properties=properties)
def get_emoji_counts(master, emoji_counts, candidate): if candidate not in emoji_counts.keys(): emoji_counts[candidate] = {} for key in master.keys(): tweet = master[key][0] date = master[key][1] date = datetime.datetime.strptime(date,'%a %b %d %H:%M:%S %Z %Y') date_ft = date.strftime('%m-%d-%Y') # Replace all URLs in Tweet (to avoid confusion with emoticon) tweet = re.sub('htt[^ ]*' ,'URL', tweet) tokens = twtokenizer.tokenize(tweet) tokens = [emoji.demojize(token) for token in tokens] # tokens = [word for word in tokens if word not in string.punctuation] for token in tokens: if re.match(':+[a-z_]*:*',token): if date_ft not in emoji_counts[candidate].keys(): emoji_counts[candidate][date_ft] = {} if token in emoji_counts[candidate][date_ft]: emoji_counts[candidate][date_ft][token] +=1 else: emoji_counts[candidate][date_ft][token] = 1 return emoji_counts
def clean(instring, spaces = True): #removes punctuation and double spaces, replacing them w/ single spaces instring.replace("\n"," ") for x in punctuation: instring = instring.replace(x, " ") instring = emoji.demojize(instring) #demojize turns emojis into text with this format: :emoji_text_alias: if instring.find(":") > -1: #then the tweet has emojis! inlist = instring.split() moreEmoji = True while moreEmoji: try: beginning = inlist.index(":") end = inlist.index(":",beginning) inlist.insert(end+1," ") except ValueError: moreEmoji = False instring = "" for x in inlist: instring += x if spaces: while instring.find(" ") > -1: #remove double spaces instring = instring.replace(" ", " ") else: while instring.find(" ") > -1: #remove all spaces instring = instring.replace(" ","") instring = instring.lower() return instring
def comment_image(browser, comments): """Checks if it should comment on the image""" rand_comment = (choice(comments)) rand_comment = emoji.demojize(rand_comment) rand_comment = emoji.emojize(rand_comment, use_aliases=True) comment_input = browser.find_elements_by_xpath('//textarea[@placeholder = "Add a comment…"]') if len(comment_input) <= 0: comment_input = browser.find_elements_by_xpath('//input[@placeholder = "Add a comment…"]') if len(comment_input) > 0: browser.execute_script("arguments[0].value = '" + rand_comment + " ';", comment_input[0]); #An extra space is added here and then deleted. This forces the input box to update the reactJS core comment_input[0].send_keys("\b") comment_input[0].submit() else: print(u'--> Warning: Comment Action Likely Failed: Comment Element not found') # print(u'--> Commented: {}'.format(rand_comment)) #print("--> Commented: " + rand_comment.encode('utf-8')) print("--> Commented: {}".format(rand_comment.encode('utf-8'))) sleep(2) return 1
def decrypt(self, encrypted_message): # Simple Ceasar Cypher, the emoji-key index position marks 'a', the rest of the alphabet is defined from starting index 'a' # cipher dict is regenerated as in Encrpyt, but then key value pairs are reversed # returns decrypted message in text if self.cipher == None: self.cipher = self.define_cipher() #reverse the cipher rev_cipher= {v: k for k, v in self.cipher.items()} decrypted = [] encrypted_message = (emoji.demojize(encrypted_message)) # this handles the combination character emojis- like sign_of_the_horns_light_skin_tone # a space is designated with a ~ # then lines are split based on : line = re.sub(' ', '~', encrypted_message) line = re.sub(':', ' ', line) line_list = (line.split()) for symbol in line_list: mod_symbol = ':'+symbol+':' if mod_symbol in rev_cipher: decrypted.append(rev_cipher[mod_symbol]) else: mod_symbol = re.sub('~', ' ', mod_symbol) mod_symbol = re.sub(':', '', mod_symbol) decrypted.append(mod_symbol) return ''.join(decrypted)
def test_misc(): trans() print(u'\U0001f604'.encode('unicode-escape')) print(u'\U0001f604') ss = u'\U0001f604' xx = chr(ss[0]) print("ss({}) xx({})".format(ss, xx)) # -*- coding: UTF-8 -*- #convert to unicode teststring = "I am happy \U0001f604" # #teststring = unicode(teststring, 'utf-8') #encode it with string escape teststring = teststring.encode('unicode_escape') print("💗 Growing Heart") print(emoji.emojize('Water! :water_wave:')) print(emoji.demojize(u'🌊')) # for Python 2.x # print(emoji.demojize('🌊')) # for Python 3.x. print(u"And \U0001F60D") print("(-woman) astronaut", chr(int("0001f680", 16))) print("woman_astronaut", chr(int("0x0001f680", 0))) print("\U0001f483\U0001f3fe") print(chr(0x001f483),chr(0x001f3fe)) print('💃 🏾 ') print(chr(0x001f483)+chr(0x001f3fe)) print('💃🏾 ') print(chr(int('1f483',16))+chr(int('1f3fe',16))) print('%8s %8s %8s' % qw_tuple('surf wave whitecap')) print("('%s', '%s', '%s')" % qw_tuple("surf's-up wave rip-curl"))
def get_from_local_cache(raw_emoji): filename = emoji.demojize(raw_emoji).replace(":", "") cached_filename = Config.CACHE_DIR + "/" + filename split_name = filename.split('/') if(os.path.exists(cached_filename)): return Config.CACHE_DIR + "/" + split_name[0] + "/01.jpg" else: raise CorgiNotFoundException("Corgi not found for emoji: {}" .format(raw_emoji))
def workaround_freetds_bug(text): """ Emoticons in Instagram posts are outside of 0xffff unicode range TDS doesn't like this. We need to use emoji package to convert those pesky emoticons to text + there are some other emoticons where emoji fails, I guess I should update emoji DB. """ text = emoji.demojize(text) text = text.replace(u'🇫󾓮', u' ') text = text.replace(u'🇺', u' ') text = text.replace(u'🇺', u' ') return text
def file_parser(filepath): data = [] with open(filepath, "r") as file: for line in file.readlines(): text = emoji.demojize(line).rstrip("\n") extracted_emojis = EMOJI_NAMES_PATTERN.findall(text) for emoji_name in extracted_emojis: text = EMOJI_NAMES_PATTERN.sub("", text) data.append((text.strip(), emoji_name.strip())) return data
def put_in_local_cache(corgis): for i in corgis: corgi = corgis.get(i, None) if not corgi: continue emoji_dir = emoji.demojize(i).replace(":", "") try: directory = Config.CACHE_DIR + '/' + emoji_dir if not os.path.exists(directory): os.makedirs(directory) urllib.request.urlretrieve(corgi, directory + "/01.jpg") except: logger.error("Failed on: " + i)
def on_data(self, data): data = str(emoji.demojize(data)) decoded = json.loads(str(data)) if 'place' in decoded and decoded['place'] is not None: loc = decoded['place']['bounding_box']['coordinates'][0][0] tweet = str(emoji.demojize(decoded['text']).encode("unicode_escape")) tweet = tweet[1:] tweet = tweet.strip("\n") tweet = tweet.strip("\.") tweet = tweet.replace("\n",". ") tweet = tweet.replace("\\'","'") tweet = tweet.replace("\\","") tweet = tweet.replace("\\\.",".") tweet = tweet.replace("\"", "'") tweet = tweet.replace("\\n",". ") print (tweet) tweetLower = tweet.lower() if("trump" in tweetLower): trump.write('{"tweet": "' + tweet +'", "coordinates": ' + str(loc) + '}\n') trump.flush() if("sanders" in tweetLower or "bernie" in tweet.lower()): bernie.write('{"tweet": "' + tweet +'", "coordinates": ' + str(loc) + '}\n') bernie.flush() if("clinton" in tweetLower): clinton.write('{"tweet": "' + tweet +'", "coordinates": ' + str(loc) + '}\n') clinton.flush() if("rubio" in tweetLower): rubio.write('{"tweet": "' + tweet +'", "coordinates": ' + str(loc) + '}\n') rubio.flush() if("cruz" in tweetLower): cruz.write('{"tweet": "' + tweet +'", "coordinates": ' + str(loc) + '}\n') cruz.flush() return True
def default_text_handler(self, client, message): """ This is the default text handler provided by Shawk. If self.demojize is True, this converts emoji to text and prints the message. Otherwise, this simply prints the raw message text. """ greeting = "Shawk received message" if self.demojize: demojized_text = emoji.demojize(message.text) print("{}: {}".format(greeting, demojized_text)) else: print("{}: {}".format(greeting, message.text))
async def send_reaction(self, reaction): """React to a message.""" emoji = demojize(reaction.emoji) _LOGGER.debug("Reacting with: %s", emoji) try: await self.slacker.reactions.post('reactions.add', data={ 'name': emoji, 'channel': reaction.target, 'timestamp': reaction.linked_event.raw_event['ts'] }) except slacker.Error as error: if str(error) == 'invalid_name': _LOGGER.warning('Slack does not support the emoji %s', emoji) else: raise
def clean_tweets(tweet): # Need to First Clean Out URLs before Tokenization tweet = re.sub('htt[^ ]*' ,'URL', tweet) #tokenizer = nltk.tokenize.treebank.TreebankWordTokenizer() cleanWords = twtokenizer.tokenize(tweet) # Convert to Lowercase cleanWords = [t.lower() for t in cleanWords] # Convert Emoji's to Word Label cleanWords = [emoji.demojize(word) for word in cleanWords] # Normalize (remove punctuation) #Remove punctuation cleanWords = [word for word in cleanWords if word not in punctuation] # punc = string.punctuation # cleanWords = [t for t in cleanWords if t not in punc] # cleanWords = [re.sub('[^0-9a-z]', "", x) for x in cleanWords] # Remove Empty Vectors cleanWords = [x for x in cleanWords if x != ''] # Remove StopWords # cleanWords = [word for word in cleanWords if word not in stopwords_short] cleanWords = [word for word in cleanWords if word not in stopwords] # Identify Digits & Convert to Num # cleanWords = [re.sub("\d+", "NUM", x) for x in cleanWords] # Remove all Web/URL References (Replace with String Replacement Above) # Could be better to replace with 'URL' # cleanWords = [word for word in cleanWords if word[0:3] != 'htt'] # cleanWords = ['URL' if word[0:3] == 'htt' else word for word in cleanWords ] # Stem Words #cleanWords = [stemmer.stem(x) for x in cleanWords] # call stemmer to stem the input # Remove Multiple Letters, Replace with only 3 so they are distinguishable, but standardized # cleanWords = [re.sub(r'(.)\1{2,}', r'\1\1\1', word) for word in cleanWords ] # Change all @ References to USER # cleanWords = ['USER' if word[0] == '@' else word for word in cleanWords ] return cleanWords
def get_all(self): all_emojis = google_spreadsheets.keys(include_empty_keys=True) corjis = [] for this_emoji in all_emojis: corgi_urls = "" if settings.Config.REMOTE_CACHE_RETRIEVE: try: corgi_urls = s3.get_all(this_emoji) except CorgiNotFoundException as e: logger.warn("Corji not found for emoji %s", this_emoji) if not corgi_urls: corgi_urls = google_spreadsheets.get_all(this_emoji) emoji_name = emoji.demojize(this_emoji).replace(":", "") corjis.append({ "urls": corgi_urls, "emoji": this_emoji, "emoji_name": emoji_name }) return { "count": len(corjis), "emojis": [corji["emoji"] for corji in corjis], "results": corjis }
def comment_image(browser, username, comments, blacklist, logger, logfolder): """Checks if it should comment on the image""" rand_comment = (choice(comments).format(username)) rand_comment = emoji.demojize(rand_comment) rand_comment = emoji.emojize(rand_comment, use_aliases=True) open_comment_section(browser) comment_input = get_comment_input(browser) try: if len(comment_input) > 0: comment_input[0].clear() comment_input = get_comment_input(browser) browser.execute_script( "arguments[0].value = '" + rand_comment + " ';", comment_input[0]) # An extra space is added here and then deleted. # This forces the input box to update the reactJS core comment_input[0].send_keys("\b") comment_input = get_comment_input(browser) comment_input[0].submit() update_activity('comments') if blacklist['enabled'] is True: action = 'commented' add_user_to_blacklist( browser, username, blacklist['campaign'], action, logger, logfolder ) else: logger.warning('--> Warning: Comment Action Likely Failed:' ' Comment Element not found') except InvalidElementStateException: logger.info('--> Warning: Comment Action Likely Failed: Probably InvalidElementStateException') logger.info("--> Commented: {}".format(rand_comment.encode('utf-8'))) sleep(2) return 1
def parse_message(lines): """ Divide the message into its components using a regex :param lines: list of lines to parse :return: list of tuples containing the different parts of the message """ struct = [] for line in lines: # We convert the emojis to text representation for easier handling line = emoji.demojize(line) match_message = re.match(MESSAGE_REGEX, line) if not match_message: continue message = match_message.group('message') match = re.match(REMOVE_MODIFIERS, message) if match: message = match.group(1) + '' + match.group(3) date = match_message.group('day') # We need to change the date from DD/MM/YY to YY/MM/DD for easier sorting day = date[0:2] month = date[3:5] year = date[6:] new_date = '{}/{}/{}'.format(year, month, day) struct.append((new_date, match_message.group('person'), message)) return struct
def on_status(self, status): # TODO: avoid duplicate tweets tweet = status.text language = 'en' if not status.retweeted and 'RT @' not in tweet: try: language = lang(tweet) except LangDetectException: pass if language == 'en': extracted_emojis = EMOJI_NAMES_PATTERN.findall( emoji.demojize(tweet) ) cleaned_text = EMOJI_NAMES_PATTERN.sub('', tweet) for emoji_name in extracted_emojis: cleaned_emoji = emoji_name.replace(':', '') print(cleaned_text) if cleaned_text.strip() == "": redis.rpush('emoji-ml::{}'.format(cleaned_emoji), cleaned_text)
def parse(self, tweet): self.stats['totalTweets'] += 1 #Look for images if 'media' in tweet['entities'] or self._is_instagram(tweet): self.stats['tweetsWithPictures'] += 1 #Look for hashtags for hashtag in tweet['entities']['hashtags']: self._increase_count(hashtag['text'], self.stats['hashtags']) #Look for urls if tweet['entities']['urls']: self.stats['tweetsWithURL'] += 1 for url in tweet['entities']['urls']: netloc = urlparse(url['expanded_url']).netloc self._increase_count(netloc, self.stats['urls']) #Look for emoji emojis = self.emoji_regex.findall(tweet['text']) if emojis: self.stats['tweetsWithEmoji'] += 1 for emoji in emojis: self._increase_count(demojize(emoji), self.stats['emoji'])
def replace_emojis(self, text): return re.sub('::', ': :', emoji.demojize(text))
def emoji(origin): try: import emoji s = emoji.demojize(origin) s = s.replace('::', ': :') lista_texto = s.split() print(lista_texto) lista_demoj=[] for palavra in lista_texto: parada=False cont=0 while not parada: for group in EMOJI_CARACTER.items(): cont+=1 qtd_emojis=EMOJI_CARACTER.__len__() chave=group[0] valor=group[1] if chave != palavra: if chave in palavra: palavra=palavra.split(chave) palavra=''.join(palavra) lista_demoj.append(palavra) lista_demoj.append(valor) #print(lista_demoj) #demoj=''.join(lista_demoj) parada=True break else: if palavra in lista_demoj: parada=True break elif palavra==chave: lista_demoj.append(valor) parada=True break elif chave not in palavra and cont <= qtd_emojis: continue else: lista_demoj.append(palavra) #demoj=''.join(lista_demoj) parada=True break #print(lista_demoj) #demoj=''.join(lista_demoj) #print(demoj) else: lista_demoj.append(valor) #print(lista_demoj) #demoj=''.join(lista_demoj) parada=True break demoj=' '.join(lista_demoj) print(origin) print(demoj) if demoj == origin: demoj=None return demoj else: return demoj except Exception as e: print(e)
sock.settimeout(120) sock.connect((server, port)) sock.send(f'PASS {oauth}\n'.encode('utf-8')) sock.send(f'NICK {nickname}\n'.encode('utf-8')) sock.send(f'JOIN #{channel}\n'.encode('utf-8')) count = 0.0 rate = 0.0 start_time = time.time() while True: resp = sock.recv(2048).decode('utf-8') if resp.startswith('PING'): sock.send('PONG\n'.encode('utf-8')) #logging.info(resp) elif len(resp) > 0: logging.info(demojize(resp)) if (count % 4 == 0): #time.sleep(0.1) end_time = time.time() rate = 4 / (end_time - start_time) start_time = end_time logging.info(f"Calls = {count}, Messages Sent Per Second = {rate}") count += 1 sock.close()
# 링크들을 전부 돌아다니면서 정보수집 for link in target_links: # 타겟 데이터 찾고 필요한 모양으로 전처리 driver.get(link) driver.implicitly_wait(5) time.sleep(1) post_date = driver.find_elements_by_tag_name('time')[-1].get_attribute("datetime").split('T')[0] try: number_of_like = driver.find_element_by_class_name('Nm9Fw').text.split(' ')[1][:-1].replace(',','') except: number_of_like = driver.find_element_by_class_name('vcOH2').text.split(' ')[1][:-1].replace(',', '') posting = driver.find_elements_by_class_name('C4VMK')[0].text.split('\n')[2:-1] posting_text = '' for text in posting: text = emoji.demojize(text) text = text.replace("'", '') text = text.replace('"', '') posting_text = posting_text + ' ' + text # print(date, number_of_like, posting) # DB에 데이터 저장 query_for_insert_data = f"INSERT INTO postings(link, post_date, number_of_like, posting) VALUES ('{link}', '{post_date}', {number_of_like}, '{posting_text}');" try: cursor.execute(query_for_insert_data) except: print(link) print(query_for_insert_data) connection.commit() # break
def test_demojize_name_only_no_space(): for name in emoji.EMOJI_UNICODE.keys(): oneway = emoji.emojize(name, False, True) roundtrip = emoji.demojize(oneway, True) assert name == roundtrip, "%s != %s" % (name, roundtrip)
def test_shortcut_translation(): for shortcut in emoji.shortcuts.SHORTCUTS.keys(): actual = emoji.demojize(shortcut, use_shortcuts=True) assert actual != shortcut expected = emoji.shortcuts.SHORTCUTS[shortcut] assert expected == actual, "%s != %s" % (expected, actual)
def unicode_to_name(e): return emoji.demojize(e.name)
def echo(update,context): bot = context.bot chat = update.effective_chat # type: Optional[Chat] # user = update.effective_user # type: Optional[User] message = update.effective_message # type: Optional[Message] chat_id = message.chat.id chat_type = message.chat.type message_id = message.message_id from_user_name = message.from_user.username from_user_id = message.from_user.id member = chat.get_member(from_user_id) # date = message.date # try: # message = update.effective_message.reply_to_message # type: Optional[Message] # audio = message.audio # document = message.document # animation = message.animation # photo = message.photo # sticker = message.sticker # video = message.video # voice = message.voice # video_note = message.video_note # contact = message.contact # pprint.pprint(message.to_dict()) # if audio is not None: # media = audio['file_id'] # tipe = "audio" # image_size = "0x0" # thumb_id = "" # elif document is not None: # media = document['file_id'] # thumb_id = document['thumb']['file_id'] # tipe = "document" # width = animation['thumb']['width'] # height = animation['thumb']['height'] # image_size = "%sx%s"%(width,height) # elif animation is not None: # media = animation['file_id'] # thumb_id = animation['thumb']['file_id'] # tipe = "animation" # width = animation['thumb']['width'] # height = animation['thumb']['height'] # image_size = "%sx%s"%(width,height) # elif len(photo) != 0: # media = photo[0]['file_id'] # thumb_id = photo[-1].file_id # tipe = "photo" # width = photo[-1].width # height = photo[-1].height # image_size = "%sx%s"%(width,height) # elif sticker is not None: # media = sticker['file_id'] # thumb_id = sticker['thumb']['file_id'] # tipe = "sticker" # width = sticker['thumb']['width'] # height = sticker['thumb']['height'] # image_size = "%sx%s"%(width,height) # elif video is not None: # media = video['file_id'] # thumb_id = video['thumb']['file_id'] # tipe = "video" # width = video['thumb']['width'] # height = video['thumb']['height'] # image_size = "%sx%s"%(width,height) # elif voice is not None: # media = voice['file_id'] # tipe = "voice" # image_size = "0x0" # thumb_id = "" # elif video_note is not None: # media = video_note['file_id'] # tipe = "video_note" # elif contact is not None: # media = contact['vcard'] # tipe = "contact" # image_size = "0x0" # thumb_id = "" # keyword = update.effective_message.text # print (tipe,keyword) # # pprint.pprint (update.message.to_dict()) # sqlUpdate = "UPDATE media SET thumb_id = ?, image_size = ? WHERE media_keyword = ? AND chat_id = '-1001162202776'" # cur.execute(sqlUpdate, (thumb_id, image_size, keyword)) # db.commit() # except Exception as e: # print (e) lock.acquire(True) try: sql = "SELECT english_day FROM setting WHERE chat_id = '%s'"%chat_id bar, jum = eksekusi(sql) if jum == 0: pass else: try: translator = Translator() try: message = re.sub(r"(?:\@|https?\://)\S+", "", message.text.encode().decode('utf-8')) except: if not message.caption: return elif message.caption ==None: message = "this is caption" else: message = re.sub(r"(?:\@|https?\://)\S+", "", message.caption.encode().decode('utf-8')) # message = message.caption.encode('ascii', 'ignore').decode('ascii') message = re.sub(r'".*?"', "", message) message = re.sub(r'/.*', "", message) message = re.sub(r"\b[A-Z\.]{2,}s?\b", "", message) try: a = translator.detect(emoji.demojize(message)).lang sekarang = datetime.datetime.now() tanggal = '{:%Y-%m-%d}'.format(sekarang) hari = datetime.datetime.strftime(sekarang.date(),"%a") if hari == bar[0][0] and a != 'en': cek = "SELECT user_id, mute FROM blacklist WHERE chat_id = '%s' AND user_id = '%s' AND tanggal = '%s'"%(chat_id,from_user_id,tanggal) bar, jum = eksekusi(cek) if jum == 0: infut = "INSERT INTO blacklist (chat_id, chat_type, user_id, user_name, mute,tanggal) VALUES ('%s','%s','%s','%s',0,'%s')"%(chat_id, chat_type, from_user_id, from_user_name,tanggal) cur.execute(infut) db.commit() bot.send_message(chat_id, random.choice(teks), reply_to_message_id=message_id) elif jum != 0 and bar[0][1] < 3: infut = "UPDATE blacklist SET mute = mute+1 WHERE chat_id = '%s' AND user_id = '%s' AND tanggal = '%s'"%(chat_id, from_user_id,tanggal) cur.execute(infut) db.commit() sisa = 2-bar[0][1] if sisa == 0: if member.status == 'administrator' or member.status == 'creator': bot.send_message( chat_id, 'Your next-non-english chat will be deleted.', reply_to_message_id=message_id) else: bot.send_message( chat_id, 'Your next-non-english chat will make you muted to this group for 24 hours.', reply_to_message_id=message_id) else: bot.send_message( chat_id, 'You have %s remaining'%(sisa), reply_to_message_id=message_id) elif jum!=0 and bar[0][1]==3: if member.status == 'administrator' or member.status == 'creator': try: update.effective_message.delete() except: bot.send_message( chat_id, 'Gak bisa di delete nih', reply_to_message_id=message_id) elif member.can_send_messages is None or member.can_send_messages: mutetime = datetime.datetime.now()+datetime.timedelta(hours=24) tanggalmute = sekarang = '{:%Y-%m-%d %H:%M:%S}'.format(mutetime) infut = "UPDATE blacklist SET mute_sampe_tanggal = '%s' WHERE chat_id = '%s' AND user_id = '%s' AND tanggal = '%s'"%(tanggalmute,chat_id, from_user_id,tanggal) cur.execute(infut) db.commit() bot.restrict_chat_member(chat_id, from_user_id, until_date=mutetime, can_send_messages=False) bot.send_message(chat_id, "Restricted until {}!".format(tanggalmute), reply_to_message_id=message_id) else: bot.send_message(chat_id, "Already muted.", reply_to_message_id=message_id) except: bot.send_message(chat_id, "Im stupid bot", reply_to_message_id=message_id) except: bot.send_message(chat_id,str(traceback.format_exc()), reply_to_message_id=message_id) finally: lock.release()
""" Return whether the string can be interpreted as a date. :param string: str, string to check for date :param fuzzy: bool, ignore unknown tokens in string if True """ try: parse(string, fuzzy=fuzzy) return True except ValueError: return False # Iterate through all the files and send messages in whatsapp for file in os.listdir(chatsFolder): filename = 'B-{}'.format(str(file[19:-4])) whatsapp.selectContact(filename.strip()) # Strips the newline character from the end of message message_file = open(chatsFolder + file, 'r') Lines = message_file.readlines() message = "" for line in Lines: if is_date(line[0:8]): whatsapp.sendMessage(emoji.demojize(message, delimiters=("", ""))) message = line else: message = message + line os.rename(chatsFolder + file, restoredFolder + filename)
def add_more_posts(companies, addDirection, addDate): for company in companies: ## import company csv as dataframe csvName = company + '.csv' output_path = pathlib.Path('../../../data/all_instagram_posts') df = pd.read_csv(output_path.joinpath(csvName)) ## get earliest and latest date oldestDate = pd.to_datetime(df['date_utc'].min()) ## earliest date in data recentDate = pd.to_datetime(df['date_utc'].max()) ## most recent date in data if addDirection == 'beginning': SINCE = oldestDate UNTIL = addDate if addDirection == 'end': SINCE = addDate UNTIL = recentDate posts = instaloader.Profile.from_username(instagram.context, company).get_posts() processed = 1 for post in takewhile(lambda p: p.date > UNTIL, dropwhile(lambda p: p.date > SINCE, posts)): print(post.date) print("...scraping info for post %i, %s" % (processed, company)) post_info = { "shortcode": post.shortcode, "username": company, "date_utc": post.date_utc.strftime('%Y-%m-%d %H:%M:%S.%f'), "is_video": "yes" if post.is_video else "no", "is_sponsored": post.is_sponsored, "hashtags": (",".join(post.caption_hashtags)).encode('utf-8', errors='ignore'), "mentions": (",".join(post.caption_mentions)).encode('utf-8', errors='ignore'), "caption": (emoji.demojize(post.caption)).encode('utf-8', errors='ignore') if post.caption else "", "video_view_count": post.video_view_count if post.is_video else 0, "video_length": post.video_duration if post.is_video else 0, "likes": post.likes, "comments": post.comments, "location_name": (post.location.name).encode('utf-8', errors='ignore') if post.location else "", "location_latlong": " ".join((str(post.location.lat), str(post.location.lng))) if post.location else "" } processed += 1 file_path = os.path.join(output_path, csvName) fieldnames=["shortcode", "username", "date_utc", "is_video", "is_sponsored", "hashtags", "mentions", "caption", "video_view_count", "video_length", "likes", "comments", "location_name", "location_latlong"] #bigdict = {'column_1': 1, 'column_2': 2, 'column_3': 3} with open(file_path, 'a+') as csv_file: #fieldnames = ['column_1', 'column_2', 'column_3'] writer = csv.DictWriter(csv_file, fieldnames=fieldnames, delimiter=',') #if '\n' not in csv_file.readlines()[-1]: # csv_file.write("\n") writer.writerow(post_info) print("...scraped %i posts for %s" % (processed - 1, company)) print("Done scraping!")
""" @author : macab (macab@debian) @file : emoji @created : Wednesday Mar 20, 2019 23:05:24 IST """ import emoji if __name__ == "__main__": # grinning face print("\U0001f600") # grinning squinting face print("\U0001F606") # rolling on the floor laughing print("\U0001F923") print(emoji.emojize(":grinning_face_with_big_eyes:")) print(emoji.demojize('😍'))
def clean_tweet(self, text): # FIXED UNICODE # text = preprocess.fix_bad_unicode(text) text = ftfy.fix_text(text) # GET TEXT ONLY FROM HTML text = BeautifulSoup(text, features='lxml').getText() # UN-PACK CONTRACTIONS text = preprocess.unpack_contractions(text) # REMOVE URL # text = preprocess.replace_urls(text) text = preprocessing.replace_urls(text) # REMOVE EMAILS # text = preprocess.replace_emails(text) text = preprocessing.replace_emails(text) # REMOVE PHONE NUMBERS # text = preprocess.replace_phone_numbers(text) text = preprocessing.replace_phone_numbers(text) # REMOVE NUMBERS # text = preprocess.replace_numbers(text) text = preprocessing.replace_numbers(text) # REMOVE CURRENCY # text = preprocess.replace_currency_symbols(text) text = preprocessing.replace_currency_symbols(text) # REMOVE ACCENTS # text = preprocess.remove_accents(text) text = preprocessing.remove_accents(text) # CONVERT EMOJIS TO TEXT words = text.split() reformed = [ self.SMILEY[word] if word in self.SMILEY else word for word in words ] text = " ".join(reformed) text = emoji.demojize(text) text = text.replace(":", " ") text = ' '.join(text.split()) # SPLIT ATTACHED WORDS text = ' '.join(re.findall('[A-Z][^A-Z]*', text)) # SPLIT UNDERSCORE WORDS text = text.replace('_', ' ') # REMOVE PUNCTUATION # text = preprocess.remove_punct(text) text = preprocessing.remove_punctuation(text) # Remove numbers text = re.sub(r'\d', '', text) # REMOVE WORDS LESS THAN 3 CHARACTERS text = re.sub(r'\b\w{1,2}\b', '', text) # NORMALIZE WHITESPACE # text = preprocess.normalize_whitespace(text) text = preprocessing.normalize_whitespace(text) return text
def clean_text(val): val = misspelled_correction(val) val = p.clean(val) val = ' '.join(punctuation(emoji.demojize(val)).split()) return val
punctuations = '''()-[]{};:'"\,<>./@#$%^&_~''' for x in val.lower(): if x in punctuations: val = val.replace(x, " ") return val # In[8]: punctuation("test ombak@ #ldfldlf??? !! ") # In[9]: data.clean_content = data.clean_content.apply( lambda x: ' '.join(punctuation(emoji.demojize(x)).split())) # In[10]: def clean_text(val): val = misspelled_correction(val) val = p.clean(val) val = ' '.join(punctuation(emoji.demojize(val)).split()) return val # In[11]: clean_text("saya punya ide💡 bag00ss@@ ! ? ")
json.dump(e_codes_json, f) # In[ ]: #def get_emoji_counts(master): emoji_counts = {} for i in range(0,len(master)): tweet = master.loc[i,'statusText'] date = master.loc[i,'statusCreatedAt'] date = datetime.datetime.strptime(date,'%a %b %d %H:%M:%S %Z %Y') date_ft = date.strftime('%Y_%m_%d') tokens = twtokenizer.tokenize(tweet) cleanWords = [word for word in cleanWords if word[0:3] != 'htt'] tokens = [emoji.demojize(token) for token in tokens if token != ':'] # tokens = [word for word in tokens if word not in string.punctuation] for token in tokens: if re.match(':+*:',token): if date_ft not in emoji_counts.keys(): emoji_counts[date_ft] = {} if token in emoji_counts[date_ft]: emoji_counts[date_ft][token] +=1 else: emoji_counts[date_ft][token] = 1 return emoji_counts # In[ ]:
def test_smile_emoji2(): txt = u'(test asdad :smile:)' assert emoji.demojize(txt, use_shortcuts=True) == u'(test asdad :smile:)'
def transTwts(configs, dest): print('transTwts() started. ') try: #global database_paras, con sql_twts = db_op.SQL_tweets() host = configs['database']['host'].replace('"', '') user = configs['database']['user'].replace('"', '') password = configs['database']['password'].replace('"', '') db = configs['database']['db'].replace('"', '') db_info_list = [] db_info_list.append(host) db_info_list.append(user) db_info_list.append(password) db_info_list.append(db) db_info_str = ','.join(db_info_list) database_paras = db_op.Database_parameters(host, user, password, db) #select_sql = r"SELECT tid, text FROM tweets.tweet where tweet_lang <> 'en' order by tid desc limit 50;" #select_sql = r"SELECT tid, text, url1 FROM tweets.tweet where tweet_lang = 'ar' order by tid desc limit 1 ;" select_sql = r"SELECT tid, text, url1, tweet_lang FROM tweets.tweet order by tid desc limit 1 ;" translate = Translator() while True: con = sql_twts.connect2database(database_paras) #results = sql_twts.select_db(r'SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED ;', con) results = sql_twts.select_db(select_sql, con) con.close() #print('results in transTwts: ', results) if len(results) < 1: time.sleep(10) continue df = pd.DataFrame(list(results), columns=['tweetID', 'text', 'url1', 'tweet_lang']) df = df.sort_values(by=['tweetID'], ascending=False) df['tweetID'] = df['tweetID'].astype(str) i = int(random.random() * len(df)) #print('df: ', df.ix[0, 'url1']) #print('i: ', i) #df = df.iloc[i] texts = list(df['text']) #urls = list(df['url1']) #texts = texts[i:i+1] # randomly get 1 tweet #print('texts: ', texts) for j in range(len(texts)): texts[j] = re.sub(r'https{0,1}:\/\/t.co\/[a-zA-Z0-9]+', '', texts[j]) texts[j] = re.sub(r'#', '', texts[j]) texts[j] = re.sub(r'@[a-zA-Z0-9_]+', '', texts[j]) #RE_EMOJI = re.compile('[\U00010000-\U0010ffff]', flags=re.UNICODE) #texts[j] = RE_EMOJI.sub(r'', texts[j]) texts[j] = emoji.demojize(texts[j]) #print('texts: ', texts) trans = translate.translate(texts, dest) traneEn = translate.translate(texts, 'en') translateds = [i.text for i in trans] translateds_En = [i.text for i in traneEn] #url_list = url1.split(";") for j in range(len(texts)): #print(r'df[text]: ', df.ix[i, 'text']) # restore the random tweet lang = df.ix[j, 'tweet_lang'].strip() try: lang_full = LANGUAGES[lang].capitalize() except: lang_full = 'Unknown' print(r'Tweets translation (original language is {}): {} {}'.format(lang_full, df.ix[j, 'text'], df.ix[j, 'url1'].replace(';', ' '))) #print(r'df[text]: ', df.ix[j, 'url1'].replace(';', ' ')) print('Tweets translation (English): ', translateds_En[j]) print('Tweets translation (Chinese): ', translateds[j]) print('') # print(r'df[text]: ', df['text']) # print('translateds: ', translateds) time.sleep(20) #print('Translation(text): {} , {}'.format(trans.text, text)) # # sql_cls = db_op.SQL_tweets() # images_ID = list(df['tweetID']) # # classified = list(df['classified']) # con = sql_twts.connect2database(database_paras) # # if len(labels) > 0: # # tried_url = 3 : the tweet images have been classified. # # print('Probs: ', type(probs)) # # print('Probs: ', probs) # sql_cls.update_rows('tweet', ['tid', 'Flooded', 'classified', 'tried_url', 'Flooded_prob'], # [images_ID, flooded, classified, [3] * len(images_ID), probs], db_info_str) # print('labels: ', labels) # # # con.close() # except Exception as e: print("Error in transTwts(): ", str(e)) time.sleep(10) transTwts(configs, dest)
def test_shortcuts(): assert emoji.demojize(u'\U0001F376 :S :S :S', no_space=True, use_shortcuts=True) == u':sake_bottle_and_cup: :confounded: :confounded: :confounded:'
def test_shortcuts(): assert emoji.demojize( u'\U0001F376 :S :S :S', no_space=True, use_shortcuts=True ) == u':sake_bottle_and_cup: :confounded: :confounded: :confounded:'
if __name__ == '__main__': trans() print(u'\U0001f604'.encode('unicode-escape')) print(u'\U0001f604') ss = u'\U0001f604' #xx = chr(ss[0]) #print("ss({}) xx({})".format(ss, xx)) # -*- coding: UTF-8 -*- #convert to unicode teststring = "I am happy \U0001f604" # #teststring = unicode(teststring, 'utf-8') #encode it with string escape teststring = teststring.encode('unicode_escape') print("💗 Growing Heart") print(emoji.emojize('Water! :water_wave:')) print(emoji.demojize(u'🌊')) # for Python 2.x # print(emoji.demojize('🌊')) # for Python 3.x. print(u"And \U0001F60D") print("(-woman) astronaut", chr(int("0001f680", 16))) print("woman_astronaut", chr(int("0x0001f680", 0))) print("\U0001f483\U0001f3fe") print(chr(0x001f483),chr(0x001f3fe)) print('💃 🏾 ') print(chr(0x001f483)+chr(0x001f3fe)) print('💃🏾 ') print(chr(int('1f483',16))+chr(int('1f3fe',16)))
def test_demojize_complicated_string(): constructed = u"testing :baby::emoji_modifier_fitzpatrick_type-3: with :eyes: :eyes::eyes: modifiers :baby::emoji_modifier_fitzpatrick_type-5: to symbols ヒㇿ" emojid = emoji.emojize(constructed) destructed = emoji.demojize(emojid) assert constructed == destructed, "%s != %s" % (constructed, destructed)
async def ebay_handle(group, task): hy_task = ANATask(task) task_log = [hy_task.task_type, hy_task.task_data] # logger.info("connecting") task = hy_task.task_data time_now = (datetime.now() + timedelta(hours=8)).strftime('%Y-%m-%d %H:%M:%S') with engine.connect() as conn: del_body = delete(ebay_product_report_result).where( ebay_product_report_result.c.task_id == task['task_id'], ) conn.execute(del_body) try: es = ESBody() # # 逐个任务完成查询es写入db search_body = es.create_search(task) search_body = await get_permission_es_body(task['user_id'], search_body, task['site']) logger.info("========================es请求体================================") logger.info(json.dumps(search_body)) logger.info("========================es请求体================================") es_connection = Elasticsearch(hosts=EBAY_ELASTICSEARCH_URL, timeout=ELASTIC_TIMEOUT) index_result = await es_connection.search( index=task['index_name'], body=search_body, size=task['result_count']) # logger.info(index_result) # 报告商品结果列表 the_es_result = index_result['hits']['hits'] name_ids = [] # 构造品类IDS for item in the_es_result: # logger.info(item) for category_id in item['_source']['leaf_category_id']: name_ids.append(category_id) # 查出category_path select_category_name = select([ ebay_category.c.category_name, ebay_category.c.category_id, ebay_category.c.category_id_path, ebay_category.c.category_name_path ]).where( and_( ebay_category.c.category_id.in_(name_ids), ebay_category.c.site == task['site'] )) cursor_name = conn.execute(select_category_name) records_name = cursor_name.fetchall() logger.info("=======补全category_path的id========") logger.info(name_ids) logger.info("===============") # 生成类目path for db_info in records_name: for category in the_es_result: for low_id in category['_source']['leaf_category_id']: # logger.info(low_id) if low_id == db_info['category_id']: name_list = db_info['category_name_path'].split(':') id_list = db_info['category_id_path'].split(':') complete_list = [] category['_source']['category_path'] = [] try: for i in range(3): complete_list.append({"name": name_list.pop(0), "id": id_list.pop(0)}) category['_source']['category_path'].append(complete_list) except Exception as e: logger.info(e) category['_source']['category_path'].append(complete_list) # 逐个商品更新db get_result_count = 0 sum_data = { "sold_total": 0, "sum_sold_last_3": 0, "sum_sold_last_7": 0, "sum_sold_last_1": 0, "sum_gmv_last_3": 0, "sum_gmv_last_7": 0, "sum_gmv_last_1": 0 } for item in the_es_result: # 构造商品dict sum_data['sold_total'] += item['_source']['sold_total'] sum_data['sum_sold_last_3'] += item['_source']['sold_last_3'] sum_data['sum_sold_last_7'] += item['_source']['sold_last_7'] sum_data['sum_sold_last_1'] += item['_source']['sold_last_1'] sum_data['sum_gmv_last_3'] += item['_source']['gmv_last_3'] sum_data['sum_gmv_last_7'] += item['_source']['gmv_last_7'] sum_data['sum_gmv_last_1'] += item['_source']['gmv_last_1'] result_info = { "task_id": task['task_id'], "item_id": item['_source']['item_id'], "img": item['_source']['img'], "title": emoji.demojize(item['_source']['title']), "site": item['_source']['site'], "brand": item['_source']['brand'], # 需要构造 "category_path": str(item['_source']['category_path']), "store_location": item['_source']['store_location'], "item_location": item['_source']['item_location'], "item_location_country": item['_source']['item_location_country'], "seller": item['_source']['seller'], "price": item['_source']['price'], "gmv_last_3_pop": item['_source']['gmv_last_3_pop'], "gmv_last_3": item['_source']['gmv_last_3'], "gmv_last_1": item['_source']['gmv_last_1'], "gmv_last_7": item['_source']['gmv_last_7'], "sold_last_7": item['_source']['sold_last_7'], "sold_last_1": item['_source']['sold_last_1'], "sold_last_3": item['_source']['sold_last_3'], "visit": item['_source']['visit_last_1'], "cvr": item['_source']['sold_last_1'] / item['_source']['visit_last_1'] if item['_source'][ 'visit_last_1'] != 0 else 0, "date": (datetime.now()).strftime('%Y-%m-%d %H:%M:%S'), "update_time": time_now } # logger.info(result_info) # 插入商品信息 ins = insert(ebay_product_report_result) insert_stmt = ins.values(result_info) on_duplicate_key_stmt = insert_stmt.on_duplicate_key_update( task_id=insert_stmt.inserted.task_id, item_id=insert_stmt.inserted.item_id, img=insert_stmt.inserted.img, title=insert_stmt.inserted.title, site=insert_stmt.inserted.site, brand=insert_stmt.inserted.brand, seller=insert_stmt.inserted.seller, price=insert_stmt.inserted.price, category_path=insert_stmt.inserted.category_path, store_location=insert_stmt.inserted.store_location, item_location=insert_stmt.inserted.item_location, item_location_country=insert_stmt.inserted.item_location_country, gmv_last_3_pop=insert_stmt.inserted.gmv_last_3_pop, gmv_last_3=insert_stmt.inserted.gmv_last_3, gmv_last_1=insert_stmt.inserted.gmv_last_1, gmv_last_7=insert_stmt.inserted.gmv_last_7, sold_last_7=insert_stmt.inserted.sold_last_7, sold_last_1=insert_stmt.inserted.sold_last_1, sold_last_3=insert_stmt.inserted.sold_last_3, visit=insert_stmt.inserted.visit, cvr=insert_stmt.inserted.cvr, date=insert_stmt.inserted.date, ) result = conn.execute(on_duplicate_key_stmt) # logger.info(result) get_result_count += 1 # 更新任务状态 logger.info(sum_data) ins = update(ebay_custom_report_task) ins = ins.values({ "status": 1, "update_time": time_now, "get_result_count": get_result_count, "product_total": get_result_count, "sold_total": sum_data['sold_total'], "sum_sold_last_3": sum_data['sum_sold_last_3'], "sum_sold_last_7": sum_data['sum_sold_last_7'], "sum_sold_last_1": sum_data['sum_sold_last_1'], "sum_gmv_last_3": round(sum_data['sum_gmv_last_3'], 2), "sum_gmv_last_7": round(sum_data['sum_gmv_last_7'], 2), "sum_gmv_last_1": round(sum_data['sum_gmv_last_1'], 2) }).where( ebay_custom_report_task.c.task_id == task['task_id'] ) result = conn.execute(ins) # logger.info(result) # 添加消息通知 ins_msg = insert(ana_user_msg) insert_stmt_msg = ins_msg.values( { "user_id": task['user_id'], "msg_id": str(task['user_id']) + str(int(time.time())), "msg_content": "您的Ebay自定义报告" + task['report_name'] + "于" + time_now + "生成成功,请及时查看!", "create_at": time_now, "status": 0 } ) result_msg = conn.execute(insert_stmt_msg) except Exception as e: logger.info(e) # 更新任务状态 ins = update(ebay_custom_report_task) ins = ins.values({ "status": 2, "update_time": time_now, # "get_result_count": get_result_count, # "product_total": get_result_count, # "sold_total": sum_data['sold_total'], # "sum_sold_last_3": sum_data['sold_last_3'], # "sum_sold_last_7": sum_data['sold_last_7'], # "sum_sold_last_1": sum_data['sold_last_1'], # "sum_gmv_last_3": round(sum_data['gmv_last_3'], 2), # "sum_gmv_last_7": round(sum_data['gmv_last_7'], 2), # "sum_gmv_last_1": round(sum_data['gmv_last_1'], 2) }).where( ebay_custom_report_task.c.task_id == task['task_id'] ) result = conn.execute(ins) # logger.info(result) # 添加消息通知 ins_msg = insert(ana_user_msg) insert_stmt_msg = ins_msg.values( { "user_id": task['user_id'], "msg_id": str(task['user_id']) + str(int(time.time())), "msg_content": "您的Ebay自定义报告" + task['report_name'] + "于" + time_now + "生成失败,请重新编辑条件或联系网站管理员!", "create_at": time_now, "status": 0 } )
def form_emoji_dict(s): emo = emoji.demojize(' '.join(c for c in s if c in emoji.UNICODE_EMOJI)) emoji_chain.append(emo)
def comment_image(browser, username, comments, blacklist, logger, logfolder): """Checks if it should comment on the image""" # check action availability if quota_supervisor("comments") == "jump": return False, "jumped" rand_comment = random.choice(comments).format(username) rand_comment = emoji.demojize(rand_comment) rand_comment = emoji.emojize(rand_comment, use_aliases=True) open_comment_section(browser, logger) # wait, to avoid crash sleep(3) comment_input = get_comment_input(browser) try: if len(comment_input) > 0: # wait, to avoid crash sleep(2) comment_input = get_comment_input(browser) # below, an extra space is added to force # the input box to update the reactJS core comment_to_be_sent = rand_comment # wait, to avoid crash sleep(2) # click on textarea/comment box and enter comment (ActionChains(browser).move_to_element( comment_input[0]).click().send_keys( comment_to_be_sent).perform()) # wait, to avoid crash sleep(2) # post comment / <enter> (ActionChains(browser).move_to_element(comment_input[0]).send_keys( Keys.ENTER).perform()) update_activity( browser, action="comments", state=None, logfolder=logfolder, logger=logger, ) if blacklist["enabled"] is True: action = "commented" add_user_to_blacklist(username, blacklist["campaign"], action, logger, logfolder) else: logger.warning("--> Comment Action Likely Failed!" "\t~comment Element was not found") return False, "commenting disabled" except InvalidElementStateException: logger.warning("--> Comment Action Likely Failed!" "\t~encountered `InvalidElementStateException` :/") return False, "invalid element state" logger.info("--> Commented: {}".format(rand_comment.encode("utf-8"))) Event().commented(username) # get the post-comment delay time to sleep naply = get_action_delay("comment") sleep(naply) return True, "success"
def onPressButton(self): button = self.user.message.text logger.info('{} - нажата кнопка {}'.format(self.user.message.chat.id, emoji.demojize(button))) if button == self.profileButton: markup = telebot.types.InlineKeyboardMarkup() markup.add( telebot.types.InlineKeyboardButton( text='🛒 Мои покупки', callback_data='my_purchases')) markup.add( telebot.types.InlineKeyboardButton(text='💰 Мои продажи', callback_data='my_sales')) markup.add( telebot.types.InlineKeyboardButton( text='🤝 Пригласить друга', callback_data='invite_message')) markup.add( telebot.types.InlineKeyboardButton( text='🏷 Ввести код купона', callback_data='enter_coupon_code')) bot.send_message(self.user.message.chat.id, '₴ Баланс: {}\n' '🛒 Покупок: {}\n' '💰 Продаж: {}'.format( self.user.balance, db.get_purchases(self.user.id), db.get_sells(self.user.id)), reply_markup=markup) elif button == self.buyButton: #тут переход на другую страницу self.user.setState('shop') if db.get_selling_products(): bot.send_message(self.user.message.chat.id, 'Товары в продаже:', parse_mode='HTML', reply_markup=Page(self.user).getMarkup()) for product in db.get_selling_products(): text = '\n\n🔹 {}\nЦена: {} ₴\nКупить: /buy_{}'.format( product['title'], product['price'], utils.convertInt(product['id'])) photos = db.get_sale_app_photos(product['id']) media_group = [] for num in range(len(photos)): media_group.append( types.InputMediaPhoto( photos[num]['photo'], caption=text if num == 0 else '')) bot.send_media_group(self.user.message.chat.id, media_group) else: bot.send_message( self.user.message.chat.id, 'К сожалению, сейчас ничего нет в продаже. Почему бы не продать что-то?', reply_markup=Page(self.user).getMarkup()) elif button == self.sellButton: if db.check_sale_rules(self.user.id) == 1: #тут переход на другую страницу self.user.setState('sale') #обновление страницы bot.reply_to( self.user.message, "Вы начали создание товара на продажу, если Вы передумали что-либо продавать или ввели неккоректные данные, нажмите кнопку Отмена. После создания заявки на продажу, модераторы проверят её и Ваш товар станет доступен для покупки другим пользователям. Статус обработки заявки можно посмотреть в личном кабинете.", reply_markup=Page(self.user).getMarkup()).wait() bot.send_message(self.user.id, "Напишите название вашего товара") else: markup = telebot.types.InlineKeyboardMarkup() markup.add( telebot.types.InlineKeyboardButton( text='Принять соглашение', callback_data='sale_confirm_rules')) bot.send_message( self.user.id, 'Перед созданием первого товара Вам нужно ознакомиться с правилами и советами:\n\n' '- Сделайте хорошие фотографии с нескольких ракурсов\n' '- Составьте подробное описание товара\n', reply_markup=markup) elif button == self.infoButton: bot.send_message( self.user.id, '{} \n {}'.format(msg.info_text, self.user.balance)) elif button == self.supportButton: self.user.setState('support') bot.reply_to( self.user.message, "Все Ваши сообщения, отправленные после этого будут переданы администрации\nДля завершения нажмите на кнопку 'Завершить'", reply_markup=Page(self.user).getMarkup())
def command(self, event): demojised = emoji.demojize(event["spec"][0]) event["stdout"].write("%s: %s" % (event["user"].nickname, demojised))
async def sc(self, ctx): user = ctx.message.author msg = ctx.message.clean_content[4:] await ctx.message.delete() # 金額のランダム生成 money = self._get_random_money() # 金額に対応した色 colors = self._get_money_colors(money) # 矩形を作成して表示 main_color = colors['main_color'] back_color = colors['back_color'] name_color = colors['name_color'] text_color = colors['text_color'] format_msg, emoji_list = self._format_text(36, msg) stamp_list = await self._get_custom_stamp_list(ctx.guild, msg) lines = format_msg.count(os.linesep) text_height = 22 font_size = 20 height = 150 + lines * text_height im = Image.new("RGBA", (450, height), tuple(main_color)) draw = ImageDraw.Draw(im) draw.rectangle((0, 100, 450, height), fill=tuple(back_color)) # 文字合成 name_font = ImageFont.truetype( str(self.path / "font/migu-1m-regular.ttf"), font_size) # ユーザー名のみ少し薄い色 draw.multiline_text((110, 20), user.display_name, fill=tuple(name_color), font=name_font) del name_font text_font = ImageFont.truetype( str(self.path / "font/migu-1m-bold.ttf"), font_size) draw.multiline_text((110, 50), f"¥ {'{:,}'.format(money)}", fill=tuple(text_color), font=text_font) draw.multiline_text((20, 115), format_msg, fill=tuple(text_color), font=text_font) offset = [0, 0] prev_str = '' for i, s in enumerate(format_msg): if unicodedata.east_asian_width(s) in 'FWA': offset[0] += font_size else: offset[0] += int(font_size / 2) # カスタム絵文字と絵文字を画像に置換 if s in ['@', '%'] and prev_str == '&': pos = [20 + offset[0] - font_size, 115 + offset[1]] # ダミー文字を塗りつぶし draw.rectangle( (pos[0], pos[1], pos[0] + font_size, pos[1] + font_size), fill=tuple(back_color)) # カスタム絵文字の場合 if s == '@': data = io.BytesIO(await stamp_list.pop(0).read()) stamp_img = Image.open(data).convert('RGBA').resize( (20, 20), Image.BICUBIC) im.paste(stamp_img, (pos[0], pos[1]), stamp_img.split()[3]) # 絵文字の場合 elif s == '%': if len(emoji_list) > 0: emoji_str = emoji.demojize(emoji_list.pop(0))[1:-1] # 変換されない絵文字が存在するので念の為チェック(2020/10/4時点で⛩のみ) emoji_img_path = self.path / f'img/emoji/{emoji_str}.png' if os.path.isfile(emoji_img_path): emoji_img = Image.open(emoji_img_path).convert( 'RGBA') im.paste(emoji_img, (pos[0], pos[1]), emoji_img.split()[3]) prev_str = s if s == '\n': offset[0] = 0 offset[1] += text_height # ユーザーのサムネを取得してImageに変換 data = io.BytesIO(await user.avatar_url.read()) thum = Image.open(data).convert('RGBA') del data thum = thum.resize((60, 60), Image.BICUBIC) # 画像合成 mask = Image.open(self.path / "img/superchat/mask_circle.jpg").convert('L') im.paste(thum, (25, 20), mask.resize((60, 60), Image.HAMMING)) im.save(self.path / "img/superchat/superchat.png") del im await ctx.send(file=discord.File(self.path / "img/superchat/superchat.png"))
def clean_tweets_opt(tweet, lower = True, demoji = True, punc = True, stopwords = [], num = False, url = True, stem = False, repeatedChar = False, users = False): # Need to Clean Out URLs before Tokenization if url: tweet = re.sub('htt[^ ]*' ,'URL', tweet) #tokenizer = nltk.tokenize.treebank.TreebankWordTokenizer() cleanWords = twtokenizer.tokenize(tweet) # lower # Convert to Lowercase if lower: cleanWords = [word.lower() for word in cleanWords] # demoji # Convert Emoji's to Word Label if demoji: cleanWords = [emoji.demojize(word) for word in cleanWords] # punc # Remove punctuation, only removes puncutation if only char in token if punc: cleanWords = [word for word in cleanWords if word not in punctuation] # Remove StopWords # Preferred list passed through function parameters cleanWords = [word for word in cleanWords if word not in stopwords] # num # Identify Digits & Convert to Num if num: cleanWords = [re.sub("\d+", "NUM", x) for x in cleanWords] # url; opt = remove, replace # Remove all Web/URL References #if url: # cleanWords = [word for word in cleanWords if word[0:3] != 'htt'] # cleanWords = ['URL' if word[0:3] == 'htt' else word for word in cleanWords ] # stem # Stem Words if stem: cleanWords = [stemmer.stem(x) for x in cleanWords] # call stemmer to stem the input # repeatedChar # Remove Multiple Letters, Replace with only 3 so they are distinguishable, but standardized if repeatedChar: cleanWords = [re.sub(r'(.)\1{2,}', r'\1\1\1', word) for word in cleanWords ] # users # Change all @ References to USER if users: cleanWords = ['USER' if word[0] == '@' else word for word in cleanWords ] ## Non-Optional Pre-processing # Trim whitespace # Remove Empty Vectors cleanWords = [x for x in cleanWords if x != ''] return cleanWords
def test_demojize_name_only(): for name in emoji.EMOJI_UNICODE.keys(): oneway = emoji.emojize(name, False) roundtrip = emoji.demojize(oneway) assert name == roundtrip, "%s != %s" % (name, roundtrip)
def test_shortcut_translation(): for shortcut in emoji.shortcuts.SHORTCUTS.keys(): actual = emoji.demojize(shortcut, use_shortcuts=True) assert actual!=shortcut expected = emoji.shortcuts.SHORTCUTS[shortcut] assert expected == actual, "%s != %s" % (expected, actual)
def test_smile_emoji(): txt = u'(<some text> :smile:)' assert emoji.emojize( emoji.demojize(emoji.emojize(txt, use_aliases=True), use_shortcuts=True)) == emoji.emojize(txt, use_aliases=True)
def read_data(X_train, X_test, Y_path, sentence_txt, bigdict, word2vec_model): TRAIN_NUM = 119018 try: print('Loading Sentences') sentences = word2vec.LineSentence(sentence_txt) except: print('Reading data to sentences') data = pd.read_csv(X_train) X_data = data['comment'].values testdata = pd.read_csv(X_test) X_testdata = testdata['comment'].values print(X_data.shape) # (12000,) print(X_testdata.shape) X_words = [] jieba.set_dictionary(bigdict) for i in range(len(X_data)): line = emoji.demojize(X_data[i]) seg_list = list(jieba.cut(line, cut_all=False)) X_words.append(seg_list) for j in range(len(X_testdata)): line = emoji.demojize(X_testdata[j]) seg_list = list(jieba.cut(line, cut_all=False)) X_words.append(seg_list) out = open(sentence_txt, "w") for sen in X_words: for word in sen: out.write(word) out.write(' ') out.write('\n') out.close() sentences = word2vec.LineSentence(sentence_txt) # word2vec try: print('Loading word2vec model') w2v_model = word2vec.Word2Vec.load(word2vec_model) except: print('Training word2vec model') w2v_model = word2vec.Word2Vec(sentences, iter=32, size=128, min_count=3, workers=4, sg=1) w2v_model.save(word2vec_model) embedding_matrix = np.zeros( (len(w2v_model.wv.vocab.items()) + 1, w2v_model.vector_size)) word2idx = {} vocab_list = [(word, w2v_model.wv[word]) for word, _ in w2v_model.wv.vocab.items()] for v, vocab in enumerate(vocab_list): word, vec = vocab embedding_matrix[v + 1] = vec word2idx[word] = v + 1 global embedding_layer embedding_layer = Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], weights=[embedding_matrix], trainable=False) X_vecs = [] readfile = open(sentence_txt, "r") for line in readfile: new_doc = [] for word in line.split(): try: new_doc.append(word2idx[word]) except: new_doc.append(0) X_vecs.append(new_doc) if len(X_vecs) >= TRAIN_NUM: break X = np.array(X_vecs) print(X.shape) label = pd.read_csv(Y_path) Y_data = label['label'].values Y = np.array(Y_data) Y = Y[0:TRAIN_NUM] return X, Y
def getEMOJI(self, text): def replacement(match): return ' TK.EMOJI.'+match.group(1).upper() + ' ' text = emoji.demojize(text) return re.sub(u'\:([a-z_-]+)\:', replacement, text)
import re print("Start cleaning Data") jieba.load_userdict("./data/dict.txt.big") train_file = open("./data/train_x.csv") train_x = train_file.readlines() train_file.close() punctuation_search = re.compile( "[\s+\.\!\/_,$%^*(+\"\']+|[+——\>\<!,。??、\-~~@#¥%……&*():]+") clean_data = [] for id in range(len(train_x)): train_x[id] = train_x[id].replace("\n", "") train_x[id] = train_x[id].split(",", maxsplit=1)[1] #train_x[id] = emoji.demojize(train_x[id]) word_list = jieba.lcut(train_x[id]) word_list = [emoji.demojize(i) for i in word_list] clean_list = [] for word in word_list: check = punctuation_search.match(word, 0) if type(check) == type(None): clean_list.append(word) if len(clean_list) != 0: clean_data.append(clean_list) print("Start training word2vec") word2vec_model = gensim.models.Word2Vec(clean_data, size=200, window=5, min_count=5, workers=3, iter=30)
def emoji_as_words(emoji_list): emoji_literal = [ emoji.demojize(em, delimiters=('', '')) for em in emoji_list ] return emoji_literal
def parse_comment_for_vote(body): """ turns a comment into a vote, if possible """ return parse_emojis_for_vote(demojize(body))
def preprocessing(text): # Rimuovo i newline text = text.replace("\n", "") # Rimuovo i link text = re.sub(r'http\S+', '', text) text = re.sub(r'www\S+', '', text) # Sostituisco le emoji con i loro aliases text = emoji.demojize(text) # Rimuovo i due punti prima e dopo dell'alias text = re.sub(r'(:)(.*?)(:)', r' \2 ', text) # Rimuovo l'underscore se gli alias sono composti da più parole text = re.sub(r'_', ' ', text) # Rimuovo lo slash text = re.sub(r'/', ' ', text) # Rimuovo | text = re.sub(r'\|', ' ', text) # Rimuovo le parentesi dal testo text = re.sub(r'(\()([^)]+)(\))', '\g<2>', text) # Sostituisco ']' con '] ' text = text.replace(']', '] ') # Rimuovo i numeri dal testo text = re.sub(r'(?<![a-zA-Z]-)(\b\d+\b)', ' ', text) # Rimuovo minuti / millioni text = re.sub(r'\d+m\b', ' ', text) # Rimuovo posizioni text = re.sub(r'\d+th\b', ' ', text) text = re.sub(r'\d+st\b', ' ', text) text = re.sub(r'\d+nd\b', ' ', text) text = re.sub(r'\d+rd\b', ' ', text) # Rimuovo ore text = re.sub(r'\d+h\b', ' ', text) text = re.sub(r'\d+am\b', ' ', text) text = re.sub(r'(\b\d+h\d+\b)', ' ', text) # Rimouvo anni text = re.sub(r'\d+s\b', ' ', text) # Sostituisco statistiche text = re.sub(r'(\d+)(ppg)', ' point per game', text, flags=re.IGNORECASE) text = re.sub(r'(\d+)(pt(s?))', ' point', text, flags=re.IGNORECASE) text = re.sub(r'(\d+)(reb(s?))', ' rebound', text, flags=re.IGNORECASE) text = re.sub(r'(\d+)(rpg)', ' rebound per game', text, flags=re.IGNORECASE) text = re.sub(r'(\d+)(ast(s?))', ' assist', text, flags=re.IGNORECASE) text = re.sub(r'(\d+)(apg)', ' assist per game', text, flags=re.IGNORECASE) text = re.sub(r'(\d+)(stl(s?))', ' steal', text, flags=re.IGNORECASE) text = re.sub(r'(\d+)(spg)', ' steal per game', text, flags=re.IGNORECASE) text = re.sub(r'(\d+)(blk(s?))', ' block', text, flags=re.IGNORECASE) text = re.sub(r'(\d+)(bpg)', ' block per game', text, flags=re.IGNORECASE) text = re.sub(r'(\d+)(OT(s?))', ' overtime', text) text = re.sub(r'(\d+)(pm)', ' three-point field goal made', text, flags=re.IGNORECASE) text = re.sub(r'(\d+)(pa)', ' three-point field goal attempted', text, flags=re.IGNORECASE) text = re.sub(r'(\d+)(P%)', ' three-point field goal', text) # Sostituisco acronimo ref text = re.sub(r'(\bTS%\b)', 'throw shooting percentage', text) # Sostituisco free-kick text = re.sub(r'(\bfree kick(s?)\b)', 'free-kick', text, flags=re.IGNORECASE) # Sostituisco free-throw' text = re.sub(r'(\bfree throw(s?)\b)', 'free-throw', text, flags=re.IGNORECASE) text = re.sub(r'(\bfreethrown(s?)\b)', 'free-throw', text, flags=re.IGNORECASE) # Sostituisco acronimo ref text = re.sub(r'(\bthrow%\b)', 'throw shooting percentage', text) # Sostituisco acronimo ref text = re.sub(r'(\bfield goal%\b)', 'field goal', text) # Sostituisco acronimo ref text = re.sub(r'(\bref\b)', 'referee', text, flags=re.IGNORECASE) # Sostituisco acronimo ref text = re.sub(r'(\bOT\b)', 'overtime', text) # Sostituisco acronimo ET text = re.sub(r'(\bET\b)', 'extra-time', text) # Sostituisco acronimo WC text = re.sub(r'(\bWC\b)', 'World Cup', text) # Sostituisco acronimo EPL text = re.sub(r'(\bEPL\b)', 'English Premier League', text) # Sostituisco acronimo PL text = re.sub(r'(\bPL\b)', 'Premier League', text) # Sostituisco acronimo VAR text = re.sub(r'(\bVAR\b)', 'Video Assistant Referee', text) # Sostituisco acronimo UCL text = re.sub(r'(\bUCL\b)', 'Uefa Champions League', text) # Sostituisco acronimo CL text = re.sub(r'(\bCL\b)', 'Champions League', text) # Sostituisco acronimo UEL text = re.sub(r'(\bUEL\b)', 'Uefa Europa League', text) # Sostituisco acronimo EL text = re.sub(r'(\bEL\b)', 'Europa League', text) # Sostituisco acronimo SG text = re.sub(r'(\bsg\b)', 'shooting guard', text, flags=re.IGNORECASE) # Sostituisco acronimo G text = re.sub(r'(\bg\b)', 'shooting guard', text, flags=re.IGNORECASE) # Sostituisco acronimo C text = re.sub(r'(\bc\b)', 'center', text, flags=re.IGNORECASE) # Sostituisco acronimo PF text = re.sub(r'(\bpf\b)', 'power forward', text, flags=re.IGNORECASE) # Sostituisco acronimo SF text = re.sub(r'(\bsf\b)', 'small forward', text, flags=re.IGNORECASE) # Sostituisco acronimo F text = re.sub(r'(\bf\b)', 'forward', text, flags=re.IGNORECASE) # Sostituisco acronimo PPG text = re.sub(r'(\bppg\b)', 'point per game', text, flags=re.IGNORECASE) # Sostituisco acronimo PTS text = re.sub(r'(\bpt(s?)\b)', 'point', text, flags=re.IGNORECASE) # Sostituisco acronimo REBS text = re.sub(r'(\breb(s?)\b)', 'rebound', text, flags=re.IGNORECASE) # Sostituisco acronimo RPG text = re.sub(r'(\brpg\b)', 'rebound per game', text, flags=re.IGNORECASE) # Sostituisco acronimo ASTS text = re.sub(r'(\bast(s?)\b)', 'assist', text, flags=re.IGNORECASE) text = re.sub(r'(\bassts\b)', 'assist', text, flags=re.IGNORECASE) # Sostituisco acronimo APG text = re.sub(r'(\bapg\b)', 'assist per game', text, flags=re.IGNORECASE) # Sostituisco acronimo STL text = re.sub(r'(\bstl(s?)\b)', 'steal', text, flags=re.IGNORECASE) # Sostituisco acronimo SPG text = re.sub(r'(\bspg(s?)\b)', 'steal per game', text, flags=re.IGNORECASE) # Sostituisco acronimo BLK text = re.sub(r'(\bblk(s?)\b)', 'block', text, flags=re.IGNORECASE) # Sostituisco acronimo BPG text = re.sub(r'(\bblk(s?)\b)', 'block per game', text, flags=re.IGNORECASE) # Sostituisco triple-double text = re.sub(r'(\btriple double\b)', 'triple-double', text, flags=re.IGNORECASE) # Sostituisco double-double text = re.sub(r'(\bdouble double\b)', 'double-double', text, flags=re.IGNORECASE) # Sostituisco acronimo FGM text = re.sub(r'(\bFGM\b)', 'field goal made', text, flags=re.IGNORECASE) # Sostituisco acronimo FGA text = re.sub(r'(\bFGA\b)', 'field goal attempted', text, flags=re.IGNORECASE) # Sostituisco acronimo FG text = re.sub(r'(\bFG\b)', 'field goal', text, flags=re.IGNORECASE) # Sostituisco acronimo FTM text = re.sub(r'(\bFTM\b)', 'free throw made', text, flags=re.IGNORECASE) # Sostituisco acronimo FTA text = re.sub(r'(\bFTA\b)', 'free throw attempted', text, flags=re.IGNORECASE) # Sostituisco acronimo FT text = re.sub(r'(\bFT\b)', 'free throw', text, flags=re.IGNORECASE) # Rimuovo highlight(s) text = re.sub(r'(\bhighlight(s?)\b)', '', text, flags=re.IGNORECASE) # Rimuovo (pre/post)(-)(match thread) text = re.sub(r'(\bpost\b( ?))?(\bpre\b( ?))?(-?)(\bmatch\b) (\bthread\b)', '', text, flags=re.IGNORECASE) # Rimuovo (pre/post)(-)(game thread) text = re.sub(r'(\bpost\b( ?))?(\bpre\b( ?))?(-?)(\bgame\b) (\bthread\b)', '', text, flags=re.IGNORECASE) # Rimuovo (daily)(discussion)(thread) text = re.sub(r'(\bdaily\b( ?))?(\bdiscussion(s)?\b)(( ?)\bthread\b)?', '', text, flags=re.IGNORECASE) # Rimuovo breaking text = re.sub(r'(\bbreaking\b)', '', text, flags=re.IGNORECASE) # Rimuovo free talk friday text = re.sub(r'(\bfree talk friday\b)', '', text, flags=re.IGNORECASE) # Rimuovo VIDEO text = re.sub(r'(\bVIDEO\b)', '', text) # Rimuovo + text = re.sub(r'(\+)', '', text) # Rimuovo le valute text = text.replace('£', '') text = text.replace('$', '') text = text.replace('€', '') # rimuovo acronimo OC text = re.sub(r'(\bOC\b)', '', text) # Rimuovo i doppi spazi text = re.sub(r' {2,}', ' ', text) lemmas = [ token for token in nlp(text) if not token.is_stop and not token.is_punct ] text = " ".join(str(token) for token in lemmas) text = text.replace('Serie', 'Serie A') return text