Exemplo n.º 1
0
def test_emoji_lis():
    assert emoji.emoji_lis("Hi, I am fine. 😁") == [{
        'location': 15,
        'emoji': '😁'
    }]
    assert emoji.emoji_lis("Hi") == []
    assert emoji.emoji_lis("Hello 🇫🇷👌") == [{
        'emoji': '🇫🇷',
        'location': 6
    }, {
        'emoji': '👌',
        'location': 8
    }]
def extract_emoji(text):
    output_list = []
    emojis = e.emoji_lis(text)
    for emoji in emojis:
            emoji = emoji['emoji']
            output_list.append(emoji)
    return output_list
def join_emoji(bigram):
    if type(bigram) == tuple:
        bigram = ' '.join(bigram)
    emoji_present = emoji.emoji_lis(bigram)
    if len(emoji_present) > 0:
        bigram = bigram.replace(' ', '')
    return bigram
Exemplo n.º 4
0
 def remove_emoji_by_polarity(self, string, p_rng=[-1, 1]):
     for emoji_data in emoji_lis(string)[::-1]:
         i, emoji = emoji_data.values()
         _polarity = self.get_df_by_emoji(emoji)['polarity'].iloc[0]
         if p_rng[0] <= _polarity <= p_rng[1]:
             string = string[:i] + '' + string[i + 1:].lstrip()
     return string.rstrip()
Exemplo n.º 5
0
    def get_emoji(text: str, client: Client) -> [str, None]:
        emojis = emoji_lis(text, "en")

        if emojis:
            return emojis[0]["emoji"]

        return ParsingUtils.get_custom_emoji(text, client)
Exemplo n.º 6
0
    def handle_data(self, text):
        try:
            if self.font is None:
                self.font = self.fonts["normal"]
        except AttributeError:
            self.font = self.fonts["normal"]

        emojis = emoji.emoji_lis(text)
        if len(emojis) != 0:
            Nemo = len(emojis)
            Ntxt = len(text)
            #divide the text up into segments enclosed by emoji
            segments = []
            for i in range(Nemo):
                if emojis[i]["location"] == 0:
                    #add emojus
                    segments.append((0, 1, True))
                    #add text following emojus
                    first = 1
                    try:
                        last = emojis[i + 1]["location"]
                    except IndexError:
                        last = Ntxt
                    segments.append((first, last, False))
                elif emojis[i]["location"] == Ntxt - 1:
                    #add text preceding first emojus
                    if i == 0:
                        segments.append((0, emojis[i]["location"], False))
                    #add emojus
                    segments.append((Ntxt - 1, Ntxt, True))
                else:
                    #add text preceding first emojus
                    if i == 0:
                        segments.append((0, emojis[i]["location"], False))
                    #add emojus
                    segments.append((emojis[i]["location"],
                                     emojis[i]["location"] + 1, True))
                    #add text following emojus
                    first = emojis[i]["location"] + 1
                    try:
                        last = emojis[i + 1]["location"]
                    except IndexError:
                        last = Ntxt
                    segments.append((first, last, False))
            #add all segments to the document
            for seg in segments:
                if seg[2]:
                    run = self.paragraph.add_run(text[seg[0]:seg[1]])
                    run.font.size = Pt(10)
                    run.font.name = self.fonts["emoji"]
                else:
                    run = self.paragraph.add_run(text[seg[0]:seg[1]])
                    run.font.name = self.font
                    run.font.size = Pt(10)
                    run.bold = True
        else:
            run = self.paragraph.add_run(text)
            run.font.name = self.font
            run.font.size = Pt(10)
            run.bold = True
Exemplo n.º 7
0
def get_unicode_emoji_count_from_string(content):
    count_dict = {}
    for standard_emoji in emoji_lis(content):
        codepoint = standard_emoji["emoji"]
        name = codepoint_to_name(codepoint)
        count_dict[(name, codepoint)] = count_dict.get(
            (name, codepoint), 0) + 1
    return count_dict
Exemplo n.º 8
0
def get_emojis(string):
    RE_CUSTOM_EMOJI = r'<(a?)(:[A-Za-z0-9_]+:)([0-9]+)>'
    emojis = list()
    for e in emoji.emoji_lis(string):
        emojis.append(e['emoji'])
    for e in re.findall(RE_CUSTOM_EMOJI, string):
        emojis.append(e[1])
    return emojis
Exemplo n.º 9
0
def test_emoji_lis():
    assert emoji.emoji_lis('Hi, I am 👌 test')[0]['location'] == 9
    assert emoji.emoji_lis('Hi') == []
    if len(
            'Hello 🇫🇷👌'
    ) < 10:  # skip these tests on python with UCS-2 as the string length/positions are different
        assert emoji.emoji_lis('Hi, I am fine. 😁') == [{
            'location': 15,
            'emoji': '😁'
        }]
        assert emoji.emoji_lis('Hello 🇫🇷👌') == [{
            'emoji': '🇫🇷',
            'location': 6
        }, {
            'emoji': '👌',
            'location': 8
        }]
Exemplo n.º 10
0
def emoji_parser(doc):
    emoji_list = emoji.emoji_lis(doc)
    doc = doc.encode("ascii", "replace").decode('utf-8')
    for em in emoji_list:
        doc_list = list(doc)
        doc_list[em['location']] = em['emoji']
        doc = ''.join(doc_list)
    return doc
Exemplo n.º 11
0
def clean_tweet(text):
    """Processes the text of a tweet

        Parameters:
        text (string): The input string (tweet) which is to pe preprocessed

        Returns:
        list: A list of words representing the processed tweet

       """
    # convert to lower case
    text = text.lower()

    # detect and remove URL's
    reg = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|" \
          r"(?:%[0-9a-fA-F][0-9a-fA-F]))+"
    text = re.sub(reg, 'URL', text)

    # detect and remove @ mentions
    text = re.sub(r"(?:\s|^)@[\w_-]+", ' MENTION', text)

    # replace & with "and"
    text = text.replace("&", ' and ')

    text = text.replace(r"\n", "")

    # remove the # sign
    hashtagwords = re.findall(r'(?:\s|^)#[\w_-]+', text)
    hashtagwords = [h.replace("#", "") for h in hashtagwords]
    text = text.replace("#", "")

    # detect and remove ordinals
    text = re.sub(r"[0-9]+(?:st|nd|rd|th)", 'ORDINAL', text)

    #convert emoji
    for emoj in emoji.emoji_lis(text):
        text = text.replace(emoj['emoji'], " " + emoj['emoji'] + " ")
    text = emoji.demojize(text)

    #encoding punctuation
    text = text.replace("_", "")
    text = text.replace("'", '')
    text = text.replace("’", '')
    text = text.replace(".", " . ")
    text = text.replace(",", " , ")
    text = text.replace("?", " ? ")
    text = text.replace("!", " ! ")

    # remove all characters except a to z and apostrophes (i.e. numbers etc)
    text = re.sub(r"[^A-Za-z'’,.?_!]+", ' ', text)

    special_words = ["MENTION", "ORDINAL", "URL"]

    return [word for word in text.split()
            if word not in special_words], hashtagwords
Exemplo n.º 12
0
def count_emoji(text):
    emoji_list = [i['emoji'] for i in emoji.emoji_lis((text))]
    count = (Counter((emoji_list)))
    df_from_counter = pd.DataFrame.from_dict(count,
                                             orient='index').reset_index()
    df_from_counter = df_from_counter.rename(columns={
        'index': 'Emoji',
        0: 'count'
    })
    df_from_counter = df_from_counter.sort_values('count', ascending=False)
    df_from_counter.to_csv('most_used_emoji.csv')
Exemplo n.º 13
0
    async def post(self, request: Request, data: dict) -> Response:
        """Handle the POST request for registration."""
        hass = request.app["hass"]

        webhook_id = secrets.token_hex()

        if hass.components.cloud.async_active_subscription():
            data[
                CONF_CLOUDHOOK_URL
            ] = await hass.components.cloud.async_create_cloudhook(webhook_id)

        data[CONF_WEBHOOK_ID] = webhook_id

        if data[ATTR_SUPPORTS_ENCRYPTION] and supports_encryption():
            data[CONF_SECRET] = secrets.token_hex(SecretBox.KEY_SIZE)

        data[CONF_USER_ID] = request["hass_user"].id

        if slugify(data[ATTR_DEVICE_NAME], separator=""):
            # if slug is not empty and would not only be underscores
            # use DEVICE_NAME
            pass
        elif emoji.emoji_count(data[ATTR_DEVICE_NAME]):
            # If otherwise empty string contains emoji
            # use descriptive name of the first emoji
            data[ATTR_DEVICE_NAME] = emoji.demojize(
                emoji.emoji_lis(data[ATTR_DEVICE_NAME])[0]["emoji"]
            ).replace(":", "")
        else:
            # Fallback to DEVICE_ID
            data[ATTR_DEVICE_NAME] = data[ATTR_DEVICE_ID]

        await hass.async_create_task(
            hass.config_entries.flow.async_init(
                DOMAIN, data=data, context={"source": "registration"}
            )
        )

        remote_ui_url = None
        try:
            remote_ui_url = hass.components.cloud.async_remote_ui_url()
        except hass.components.cloud.CloudNotAvailable:
            pass

        return self.json(
            {
                CONF_CLOUDHOOK_URL: data.get(CONF_CLOUDHOOK_URL),
                CONF_REMOTE_UI_URL: remote_ui_url,
                CONF_SECRET: data.get(CONF_SECRET),
                CONF_WEBHOOK_ID: data[CONF_WEBHOOK_ID],
            },
            status_code=HTTP_CREATED,
        )
Exemplo n.º 14
0
def _emoji_description_extractor(text: str):
    """
    Used to extract the decription of emojis used in the given text.
    """
    emoji_list = emoji.emoji_lis(text)
    emoji_descriptions = [
        str.strip(
            emoji.demojize(vars.get("emoji")).replace("_",
                                                      " ").replace(":", ""))
        for vars in emoji_list
    ]
    return emoji_descriptions
Exemplo n.º 15
0
def extract_emoji_terms(text):
    """

    """

    emoji_list = emoji.emoji_lis(text)
    unique_terms = set([
        emoji.demojize(e['emoji']).replace(':', '').replace('_', ' ')
        for e in emoji_list
    ])
    str_terms = ' '.join(unique_terms)

    return str_terms
Exemplo n.º 16
0
def tag_emoji_subject(line):
    ret_cat = []
    emoji_l = (emoji.emoji_lis(line))
    for j in emoji_l:
        f = True
        ucode = emoji.unicode_codes.EMOJI_UNICODE[emoji.demojize(j['emoji'])]
        for em in emoji_Class:
            if em['char'] == ucode:
                ret_cat.append(em['category'])
                f = False
        if f:
            print("f**k")
    return ret_cat
Exemplo n.º 17
0
 def __init__(self, line: str):
     self.sender = None
     self.content = ""
     self.date = None
     self.emojis = list()
     self.group_event = False
     try:
         self.date = datetime.date(int(line[6:10]), int(line[3:5]), int(line[0:2]))
         pattern = " \".[^\"]*\""
         if re.sub(pattern, "", line[19:]).__contains__(":"):
             offset = line[19:].index(":") + 19
             self.sender = line[19: offset]
             self.content = line[offset + 2:]
         else:  # is a group event line
             self.group_event = True
             if line.__contains__("removeu"): #TODO ver o fomrtado da remoção
                 offset = line.index("removeu") - 1
                 self.sender = line[19:offset]
                 offset = offset + 1
                 self.content = line[offset:]
             elif line.__contains__("adicionou"):#TODO ver o formato de adicionar
                 offset = line.index("adicionou") - 1
                 self.sender = line[19:offset]
                 offset = offset + 1
                 self.content = line[offset:]
             elif line.__contains__("saiu"): #TODO ver o formato de sair
                 offset = line.index("saiu") - 1
                 self.sender = line[19:offset]
                 self.content = "saiu"
             elif line.__contains__("mudou o nome de"):
                 offset = line.index("mudou o nome de") - 1
                 self.sender = line[19:offset]
                 offset = offset + 1
                 self.content = line[offset:]
             elif line.__contains__("mudou a imagem"):
                 offset = line.index("mudou a imagem") - 1
                 self.sender = line[19:offset]
                 offset = offset + 1
                 self.content = line[offset:]
             elif line.__contains__("apagou a imagem"):
                 offset = line.index("apagou a imagem") - 1
                 self.sender = line[19:offset]
                 offset = offset + 1
                 self.content = line[offset:]
         if emoji.emoji_count(self.content) > 0:
             self.emojis = emoji.emoji_lis(line)
     except Exception as e:
         print(e)
     if self.sender == "Você":
         self.sender = MY_WHATSAPP_USERNAME
Exemplo n.º 18
0
def emoji_count(tweet):
    """
    receives a tweet and counts how many common words it has with each of the tweeters
    """
    with open('most_common_5_emojis.json', 'r') as fp:
        most_common_emojis = json.load(fp=fp)
    emoji_l = (emoji.emoji_lis(tweet))
    emoji_count_list = [0] * 10
    for emoji_dict in emoji_l:
        current_emoji = emoji_dict['emoji']
        for user in most_common_emojis:
            if current_emoji in most_common_emojis[user]:
                emoji_count_list[user] += 1

    return emoji_count_list
Exemplo n.º 19
0
    def emoji_to_types(string):
        """
		Tool to extract emojis from string of text

		Input:  string with some (if any) emojis
		Ouput:  dictionary containing discovered emojies and their corresponding label.
		"""
        emos = emoji.emoji_lis(string)

        result = dict()
        for idx, emo in enumerate(emos):
            descr = emoji.UNICODE_EMOJI_ALIAS[emo['emoji']]
            cat = df[df.EMOJI_ALIAS_UNICODE == descr].VEC_TO_CAT.item()
            result[cat] = result.get(cat, []) + [emo['emoji']]
        return result
Exemplo n.º 20
0
    def parse_option(cls, opt_line: str) -> VoteOption:
        matches = cls.emoji_regex.match(opt_line)
        if matches is None:
            # it is not a disnake emoji, try unicode
            emojis = emoji.emoji_lis(opt_line)
            if len(emojis) > 0 and emojis[0]['location'] == 0:
                opt_emoji = emojis[0]['emoji']
                opt_message = opt_line[len(opt_emoji):].strip()
            else:
                raise cls.NotEmojiError(opt_line)
        else:
            opt_emoji = matches.group(1)
            opt_message = matches.group(2).strip()

        return cls.VoteOption(opt_emoji, matches is None, opt_message, 0)
Exemplo n.º 21
0
 def __init__(self, line: str, user_typed: bool):
     self.user_typed = user_typed
     self.sender = None
     self.content = ""
     self.date = None
     self.emojis = list()
     self.group_event = False
     try:
         if line[0] == "[":
             self.date = datetime.date(int(line[7:11]), int(line[4:6]), int(line[1:3]))
             pattern = " \".[^\"]*\""
             if re.sub(pattern, "", line[22:]).__contains__(":"):
                 offset = line[22:].index(":") + 22
                 self.sender = line[22: offset]
                 self.content = line[offset + 2:]
             else: #is a group event line
                 self.group_event = True
                 if line.__contains__("removeu"):
                     offset = line.index("removeu") - 1
                     self.sender = line[22:offset]
                     offset = offset + 1
                     self.content = line[offset:]
                 elif line.__contains__("adicionou"):
                     offset = line.index("adicionou") - 1
                     self.sender = line[22:offset]
                     offset = offset + 1
                     self.content = line[offset:]
                 elif line.__contains__("saiu"):
                     offset = line.index("saiu") - 1
                     self.sender = line[22:offset]
                     self.content = "saiu"
                 elif line.__contains__("mudou o nome do grupo para"):
                     offset = line.index("mudou o nome do grupo para") - 1
                     self.sender = line[22:offset]
                     offset = offset + 1
                     self.content = line[offset:]
                 elif line.__contains__("mudou a imagem"):
                     offset = line.index("mudou a imagem") - 1
                     self.sender = line[22:offset]
                     offset = offset + 1
                     self.content = line[offset:]
             if emoji.emoji_count(self.content) > 0:
                 self.emojis = emoji.emoji_lis(line)
     except Exception as e:
         print(e)
     if self.sender == "Você":
         self.sender = MY_WHATSAPP_USERNAME
Exemplo n.º 22
0
    def find_roles(self, embed: discord.Embed, guild: discord.Guild):
        reaction_to_role = {}
        for field in embed.fields:
            emoji_list = emoji.emoji_lis(field.value)
            if not emoji_list:
                continue

            reaction = emoji_list[0]["emoji"]

            match: re.Match = re.match(r"<@&(.+)>", field.value)
            if match is None:
                continue
            role_id: str = match.group(1)
            role: discord.Role = guild.get_role(int(role_id))
            if role is not None:
                reaction_to_role[reaction] = role
        return reaction_to_role
Exemplo n.º 23
0
 async def fetch_content(self, ctx, *, args=None):
     if args is None:
         await ctx.send("please send actual text")
     if args:
         args = discord.utils.escape_mentions(args)
         args = discord.utils.escape_markdown(args,
                                              as_needed=False,
                                              ignore_links=False)
     for x in ctx.message.mentions:
         args = args.replace(x.mention, f"\{x.mention}")
     emojis = emoji.emoji_lis(args)
     emojis_return = [d["emoji"] for d in emojis]
     for x in emojis_return:
         args = args.replace(x, f"\{x}")
     for x in re.findall(r":\w*:\d*", args):
         args = args.replace(x, f"\{x}")
     await ctx.send(f"{args}")
Exemplo n.º 24
0
def on_buttonpress():
    if text_input.value != "":
        tweets = get_tweets()
        allemojis = []
        ef = []
        for i in range(len(t)):
            rr = len(t[i])
            for j in range(rr):
                allemojis = "".join(t[i][j]['emoji'])
                emoji_list = emoji.emoji_lis(allemojis)
                if emoji_list != []:
                    ef.append(emoji_list)
                em = []
                for k in range(len(ef)):
                    for l in range(len(ef[k])):
                        em.append(ef[k][l]['emoji'])
                emoji_series = pd.Series(em)
                emojis = pd.DataFrame(
                    emoji_series.value_counts()).reset_index().rename(
                        columns={
                            'index': 'emoji',
                            0: 'Count'
                        })
                emojis['Rank'] = pd.Series(range(1, len(emojis)))
        emojis = emojis.head(10)
        emojis['Rank'] = emojis['Rank'].apply(lambda x: int(x))
        source_emoji.data = dict(emoji=emojis['emoji'],
                                 Count=emojis['Count'],
                                 Rank=emojis['Rank'])
        labels = LabelSet(x="Rank",
                          y="Count",
                          text="emoji",
                          level='glyph',
                          render_mode='canvas',
                          source=source_emoji,
                          x_offset=-12,
                          text_font_size="12pt")
        p.vbar(x="Rank", top="Count", width=0.95, source=source_emoji)
        p.xaxis.minor_tick_line_color = None  # turn off x-axis minor ticks
        p.yaxis.minor_tick_line_color = None  # turn off y-axis minor ticks
        p.y_range.start = 0
        p.x_range.start = 0
        p.xaxis[0].ticker.desired_num_ticks = 10
        p.add_layout(labels)
    else:
        pass
Exemplo n.º 25
0
 async def poll(self, ctx, *, a: str):
     id = ctx.message.author.id
     emoji_init_string = str(emoji.emoji_lis(a))
     disc_emoji_sep = re.findall(r"':([^:']*):'",
                                 emoji.demojize(emoji_init_string))
     disc_emoji_string = emoji.emojize(
         str([''.join(':' + demoji + ':') for demoji in disc_emoji_sep]))
     disc_emoji = re.findall(r"'([^']*)'", disc_emoji_string)
     custom_emojis = re.findall(r'<([^>]*)>', a)
     cemojilist = [''.join('<' + cemoji + '>') for cemoji in custom_emojis]
     all_emojis = disc_emoji + cemojilist
     poll = await ctx.send("**Poll from** <@" + str(id) + ">**!!**\n"
                           "" + a)
     for i in all_emojis:
         try:
             await poll.add_reaction(i)
         except:
             print("Emoji " + i + " not found")
Exemplo n.º 26
0
async def process_message(message, add_count_fn):
    message_date = date_to_string(message.created_at)
    channel = str(message.channel)
    custom_emojis_in_message = re.findall(r'<:\w*:\d*>', message.content)
    custom_emojis_in_message = [
        e.split(':')[1].replace('>', '') for e in custom_emojis_in_message
    ]
    for custom_emoji in custom_emojis_in_message:
        add_count_fn(message_date, channel, custom_emoji)
    for standard_emoji in emoji_lis(message.content):
        add_count_fn(message_date, channel,
                     demojize(standard_emoji["emoji"], delimiters=("", "")))
    for react in message.reactions:
        e = react.emoji
        if (not isinstance(e, str)):
            e = e.name
        else:
            e = demojize(e, delimiters=("", ""))
        add_count_fn(message_date, channel, e, react.count)
Exemplo n.º 27
0
def create_top_k_emoji_json(k: int):
    df = pd.read_csv(r'./train_full')
    emoji_counter: Dict[int, Dict] = {i: {} for i in range(0, 10)}
    for i in range(len(df['tweets'])):
        user: int = df['user'][i]
        tweet = df['tweets'][i]
        emoji_l = (emoji.emoji_lis(tweet))
        if emoji_l:
            for emoji_dict in emoji_l:
                current_emoji = emoji_dict['emoji']
                if current_emoji not in emoji_counter[user]:
                    emoji_counter[user][current_emoji] = 0
                emoji_counter[user][current_emoji] += 1
    top_k_emojis = {
        i: Counter(emoji_counter[i]).most_common(k)
        for i in range(0, 10)
    }
    with open(f'most_common_{k}_emojis.json', 'w') as fp:
        json.dump(top_k_emojis, fp)
def tweet_preprocessor(tweets):
    """ Function that uses the tweet-preprocessor and emojis libraries
        to remove:
            - #Hashtags
            - @Mentions
            - Emojis
            - URLs
            - Standalone numbers

        and transform all letters into lower case

    Args:
        tweets: list containing all tweets

    Returns:
        clean_tweets: lower-cased preprocessed list of tweets

    Sources:
        https://pypi.org/project/tweet-preprocessor/
        https://github.com/carpedm20/emoji/

    """

    # set the global options for the library. These settings define which
    # elements of the tweet to pay attention to
    tp.set_options(tp.OPT.URL, tp.OPT.MENTION, tp.OPT.NUMBER, tp.OPT.HASHTAG)

    # create a list to store the results
    clean_tweets = []

    # iterate over all tweets in the list
    for tweet in tweets:

        # remove emojis
        for e in emoji.emoji_lis(tweet):
            tweet = tweet.replace(e['emoji'], '')

        # append the cleaned lowered-cassed tweet
        clean_tweets.append(tp.clean(tweet).lower())

    return(clean_tweets)
Exemplo n.º 29
0
def parse_emojis(text):
    return [emoji.demojize(e['emoji']) for e in emoji.emoji_lis(text)]
Exemplo n.º 30
0
 def get_emotes(emotes: str) -> str:
     from emoji import emoji_lis
     emotes = emoji_lis(emotes)
     if len(emotes) > 24:
         raise SyntaxError("You cant put more than 24 emotes in a message.")
     return "".join([emoji["emoji"] for emoji in emotes])