Пример #1
0
 def check_emojis(e1: EMOJI_CHECK,
                  e2: EMOJI_CHECK) -> tuple[bool, Optional[str]]:
     """Validate the emojis, the user put."""
     if isinstance(e1, str) and emojis.count(e1) != 1:
         return False, e1
     if isinstance(e2, str) and emojis.count(e2) != 1:
         return False, e2
     return True, None
Пример #2
0
    def check_emojis(e1: EMOJI_CHECK,
                     e2: EMOJI_CHECK) -> Tuple[bool, Optional[str]]:
        """ Validates the emojis the user put.
        :param e1: The first emoji.
        :param e2: The second emoji. """

        if isinstance(e1, str) and emojis.count(e1) != 1:
            return False, e1
        if isinstance(e2, str) and emojis.count(e2) != 1:
            return False, e2
        return True, None
Пример #3
0
def get_all_specific_features(Authors):
    features = []
    for author in Authors:
        features_user = []
        emoji = 0
        first_person = 0
        pronouns = 0
        negations = 0
        for tweet in author['tweets']:
            current_preprocessed_tweet = clean_en.tokenize(tweet)
            emoji = emoji + emojis.count(tweet)
            '''emoji = emoji + 
                len(re.findall(u'[\U0001f600-\U0001f650]',
                               tweet))'''
            first_person = first_person + \
                current_preprocessed_tweet.count('<first_person>')
            pronouns = pronouns + current_preprocessed_tweet.count('<pronoun>')
            negations = negations + \
                current_preprocessed_tweet.count('<negation>')

        features_user.append(emoji)
        features_user.append(first_person)
        features_user.append(pronouns)
        features_user.append(negations)
        features.append(features_user)

    return features
Пример #4
0
def emojiCount(file):
    file = open(file, 'r', encoding='utf-8')
    i = 0
    for line in file:
        if emojis.count(line) > 0:
            i += 1
    print(i)
def emoticon_number_sd(author):
    emoticon_number_list = []
    for tweet in author['tweets']:
        # emoticon_number = len(re.findall(u'[\U0001f600-\U0001f650]', tweet))
        emoticon_number_list.append(emojis.count(tweet))
    sd = stats.stdev(emoticon_number_list)
    return sd
Пример #6
0
def final_process(word):
    if emojis.count(word) > 0:
        return [word]
    elif len(re.sub(r'\W+', '', word)) > 1:
        return re.sub(r'\W+', ' ', word).split()
    else:
        return ''
def get_all_specific_features_label(Authors):
    print("specific es")
    features = []
    i = 0
    for author in Authors:
        print(i)
        i = i + 1
        features_user = []
        emoji = 0
        first_person = 0
        pronouns = 0
        negations = 0
        for tweet in author['tweets']:
            current_preprocessed_tweet = clean_es.tokenize(tweet)

            emoji = emoji + emojis.count(tweet)
            first_person = first_person + \
                current_preprocessed_tweet.count('<first_person>')
            pronouns = pronouns + current_preprocessed_tweet.count('<pronoun>')
            negations = negations + \
                current_preprocessed_tweet.count('<negation>')

        features_user.append(emoji)
        # features_user.append(first_person)
        # features_user.append(pronouns)
        features_user.append(negations)
        features.append(features_user)

    return features
Пример #8
0
 def count_unique_emojis(text: str, unique=True):
     '''
     https://emojis.readthedocs.io/en/latest/api.html#sample-code
     import emojis
     '''
     emoj = emojis.count(text)
     return emoj
Пример #9
0
def emoji_1(Authors):
    for author in Authors:
        emoji = 0
        for tweet in author['tweets']:
            emoji = emoji + emojis.count(tweet)
            '''emoji = emoji + 
                len(re.findall(u'[\U0001f600-\U0001f650]',
                               tweet))'''
        author['emoji'] = emoji
def emoticon_ratio(author):
    tweet_having_emoticon = 0
    emoticon_number = 0
    for tweet in author['tweets']:
        # emoticon_number = len(re.findall(u'[\U0001f600-\U0001f650]', tweet))
        emoticon_number = emojis.count(tweet)
        if emoticon_number > 0:
            tweet_having_emoticon += 1
    return tweet_having_emoticon/len(author['tweets'])
Пример #11
0
def emoji_2(Authors):
    features = []
    for author in Authors:
        emoji = 0
        for tweet in author['tweets']:
            emoji = emoji + emojis.count(tweet)
            '''emoji = emoji + 
                len(re.findall(u'[\U0001f600-\U0001f650]',
                               tweet))'''
        features.append(emoji)

    return features
Пример #12
0
    def text(update, context):
        text_received = update.message.text

        # Hack to check if all graphemes are emojis
        if emojis.count(text_received) == len(
                list(grapheme.graphemes(text_received))):
            print("emojis:", text_received)
            update.message.reply_text(f'uwu {text_received}')
            SSEFuckery.sse_broadcast("emojis", text_received)
            return

        print("scrolly-text:", text_received)
        update.message.reply_text(f'auzi cica >{text_received}')
        SSEFuckery.sse_broadcast("scrolly-text", text_received)
Пример #13
0
async def radio_mode_message(message):
    """Listens to messages and deletes them if they're not text-only in a radio channel"""
    if message.author == OMEGA.user:
        return
    cursor.execute("SELECT EXISTS(SELECT 1 FROM radio WHERE channel_id=?);",
                   [message.channel.id])
    if cursor.fetchall(
    )[0][0] and (message.attachments or emojis.count(message.content) or any(
            re.search(expression, message.content) for expression in
        [
            r"<:\w*:\d*>",
            r"(([\w]+:)?//)?(([\d\w]|%[a-fA-f\d]{2})+(:([\d\w]|%[a-fA-f\d]{2})+)?@)"
            r"?([\d\w][-\d\w]{0,253}[\d\w]\.)+[\w]{2,63}(:[\d]+)?"
            r"(/([-+_~.\d\w]|%[a-fA-f\d]{2})*)"
            r"*(\?(&?([-+_~.\d\w]|%[a-fA-f\d]{2})=?)*)?(#([-+_~.\d\w]|%[a-fA-f\d]{2})*)?",
        ])):
        await message.delete()
Пример #14
0
def process_tweets(tweet):
    '''
    Takes in a single tweet (string of text, e.g. the 'tweet_text' key)
    and returns a processed, tokenized version of the tweet (list)
    '''
    # both emoji encoders exclude different emojis, so figured why not use both for increased coverage
    tweet = emoji.emojize(emojis.encode(tweet))
    # remove @ tags (usernames)
    tweet = re.sub('@[A-Za-z0-9_]+', '', tweet)
    # remove commas from numbers (e.g. 1,000 -> 1000)
    tweet = re.sub('([0-9],[0-9])', lambda x: str(x.group(0)).replace(',', ''),
                   tweet)
    # remove strange unicode encodings and use NLTK TweetTokenizer to tokenize
    tweet = [
        word.encode('ascii', 'ignore').decode('ascii')
        if emojis.count(word) == 0 else word for word in tt.tokenize(tweet)
    ]
    # remove nonalphanumeric words except for emojis
    tweet = [
        final_process(word) for word in tweet if final_process(word) != ''
    ]
    return [item for sublist in tweet for item in sublist]
def create_tweet_features(tweets_data, run_countvec=False, run_tfidf=False):
    # clean_twitter_text should be run before this function for the best results
    # Create feature for number of exclamation marks
    tweets_data['exc_count'] = tweets_data['text'].map(lambda x: x.count("!"))
    # Create a column for number of characters
    tweets_data['characters_nb'] = tweets_data.text.apply(len)
    # Add count for emojis
    tweets_data['emoji_count'] = tweets_data['text'].map(
        lambda x: emojis.count(x))

    # Add text features
    vocab_list = vocab.get_all_vocab()
    if run_countvec:
        tweets_data_vocab = apply_countvectorizer(tweets_data,
                                                  'text_dec',
                                                  vocab_def=vocab_list)
    elif run_tfidf:
        tweets_data_vocab = apply_tfidf(tweets_data,
                                        'text_dec',
                                        vocab_def=vocab_list)

    return tweets_data_vocab
Пример #16
0
 def count_emojis(text: str):
     '''
     import emojis
     '''
     emoj = emojis.count(text)
     return emoj
Пример #17
0
    id_list = []

    for i in range(1):

        if i == 0:
            emoji_list = ":smile: :joy: :heart: :disappointed: :sob: :sparkles: :relieved: :wink: :cry: :angry:"
            keywords = emojis.encode(emoji_list).split()
            #keywords = ["the","be","to","of","and","a","in","that","have","I"]
        else:
            keywords = [
                "good", "happy", "free", "great", "easy", "bad", "alone",
                "upset", "sad", "lost"
            ]
        #for tweet in tweets:
        for tweet in Cursor(api.search, q=["trump"], lang="en").items(10):
            if (emojis.count(tweet.text) != 0) and (tweet.id not in id_list):

                #basic information of tweets
                my_id = tweet.id
                id_list.append(tweet.id)
                date = tweet.created_at
                author = tweet.author.screen_name
                #many tweets don't have location information
                if tweet.place != None:
                    location = tweet.place.name
                elif tweet.author.location != None:
                    location = tweet.author.location
                else:
                    location = tweet.user.location
                likes = tweet.favorite_count
                retweets = tweet.retweet_count
Пример #18
0
def has_permissions(user: discord.Member) -> bool:
    return any(role in user.roles
               for role in [Constants.PEPEHACK_ROLE, Constants.BARTENDER_ROLE])


# Выбирает из 2 вариантов male/female в зависимости от наличия соответственной роли в дискорде
def gender(user, male, female):
    return female if Constants.FEMALE_ROLE in user.roles else male


# Выдаёт форматированный смайл с названием emote_name в формате строки для сообщения в дискорде
# Если смайла с данным именем нет на сервере, возвращаяется пустая строка
def emote(emote_name: str) -> str:
    if (emote := discord.utils.get(Constants.GUILD.emojis, name=emote_name)):
        return str(emote)
    elif emojis.count(emojis.encode(f':{emote_name}:')) > 0:
        return emojis.encode(f':{emote_name}:')
    else:
        return ''


# Возвращает id юзера типа int при заданном пинге юзера (отметка через @)
# В случае ошибки поднимает исключение ValueError
def get_id(user_mention: str) -> int:
    user_mention = user_mention.replace("<", "")
    user_mention = user_mention.replace(">", "")
    user_mention = user_mention.replace("@", "")
    user_mention = user_mention.replace("!", "")
    user_mention = user_mention.replace("&", "")
    return int(user_mention)
Пример #19
0
def is_emoji(s):
    try:
        return emojis.count(s)
    except:
        return 0
                    else:
                        location = tweet.user.location
                    likes = tweet.favorite_count
                    retweets = tweet.retweet_count
                    
                    #use TextBlob to do sentiment analysis
                    clean_tweet = ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet.text).split())
                    analysis = TextBlob(clean_tweet)
                    polarity = analysis.polarity
                    subjectivity = analysis.subjectivity


                    #use emojis to do emoji process
                    emoji_all = emojis.get(tweet.text)
                    emoji_see = emojis.decode(" ".join(emoji_all))
                    emoji_num = emojis.count(tweet.text)

                    #previous tweet text data, use unicode to encode, also can .decode("unicode_escape") 
                    tweet = tweet.text.encode("unicode_escape")

                    s = pd.Series({'id':my_id,'date':date,'author':author,'location':location,'likes':likes,'retweets':retweets,'polarity':polarity,'subjectivity':subjectivity,'emoji_all':emoji_all,'emoji_see':emoji_see,'emoji_num':emoji_num,'tweet':tweet})
                    df = df.append(s, ignore_index=True)

            print(df.shape[0])
            print(i)
            time.sleep(3)



    
    df.to_excel('TweetData_emoji.xlsx', sheet_name='Sheet1')  # Excel is better, beecause we can see the emoji
Пример #21
0
# 测试Emoji表情符号包Emojis的功能

import emojis

print(emojis.count('Python is fun 👍'))

# 读取搜狗输入法的表情示例,验证搜狗输入法的表情代码,的确为Unicode,如U+0001F601

from collections import defaultdict

file = open('data\sogouw.txt', 'w', encoding='utf-8-sig')
with open('data\sogou.txt', 'r', encoding='utf-8-sig') as f:
    i = 0
    for line in f:
        line = line.replace(' ', '\n')
        while i < len(line):
            file.write(line[i])
            i += 1
file.close()
frequencies = defaultdict(int)
with open('data\sogouw.txt', 'r', encoding='utf-8-sig') as f:
    for line in f:
        line = line.replace('\n', '')
        a = line.encode('unicode-escape').decode('utf-8')
        print(line, a)
        a = a.replace('\\u', '').replace('\\U', '').replace('feff', '')
        # 计算Sogou Unicode的十进制值以划定范围
        try:
            b = int(a, 16)
        except:
            print(a)
Пример #22
0
def EmojiCount(tweet):
    tweet = emojis.encode(tweet)
    return emojis.count(tweet, unique=True)
def emoticon_number_avg(author):
    emoticon_number = 0
    for tweet in author['tweets']:
        # emoticon_number = len(re.findall(u'[\U0001f600-\U0001f650]', tweet))
        emoticon_number += emojis.count(tweet)
    return emoticon_number/len(author['tweets'])