Пример #1
0
def train_emoji_vectors():
    with open("data/emojis/emoji_df.csv",
              mode="r",
              encoding="utf-8",
              newline="") as file:
        lines = reader(file)

        codes = []

        for emoji in lines:
            if ":" not in emoji[1] and "," not in emoji[1]:
                clean = re.sub(r"[^a-zA-Z0-9\s-]+", "", emoji[1])
                lower = clean.lower()
                emoji = lower.split()
                emoji = '-'.join(emoji)
                codes.append(emoji)

    getemojis.get_emojis(codes)
    emoji_vectors = {}
    for code in codes:
        vector = get_vector(code)
        if isinstance(vector, np.ndarray):
            emoji_vectors[code] = get_vector(code)

    data = [f"{len(emoji_vectors)} 300"]
    for emoji in emoji_vectors:
        numbers = " ".join(
            str(number) for number in np.nditer(emoji_vectors[emoji]))
        line = f"{emoji} {numbers}"
        data.append(line)
    text = "\n".join(data)
    with open("data/emojis/emoji_vector_model.txt", "w") as file:
        file.write(text)
Пример #2
0
 def post(self):
     settings = request.get_json()
     emoji = settings['emoji']
     filename = os.path.dirname(f"data/emojis/{emoji}.png")
     if os.path.exists(filename):
         return send_file(f"data/emojis/{emoji}.png",
                          mimetype="image/png")
     else:
         getemojis.get_emojis(emoji)
         return send_file(f"data/emojis/{emoji}.png",
                          mimetype="image/png")
Пример #3
0
def analyze_users(num_of_tweets=2000, force_new_tweets=False, emojis=False):
    users = []
    with open("data/handles.json", "r") as file:
        people = json.load(file)
    for name in people:
        person = ChiUser(name)
        tweets = None

        # Check for successful load of user data
        if not person.load_user_data():
            force_new_tweets = True
        elif len(person.tweets) <= 0:
            force_new_tweets = True

        # Check if we are forcing the system to get new tweets
        if force_new_tweets:
            if num_of_tweets > 0:
                print(f"Grabbing tweets for {person.username}")
                tweets = twitterhandler.get_tweets(person.username,
                                                   num_of_tweets)

        # Do the analysis
        print(f"Analyzing tweets for {person.username}")
        person.find_words(tweets)
        # user.print_words()
        if isinstance(person, ChiUser):
            print(f"Working statistical analysis for {person.username}")
            chisquaredmodel.add_user(person)
            chisquaredmodel.calculate_user(person)
        elif isinstance(person, BayesUser):
            print(f"Working statistical analysis for {person.username}")
            bayesmodel.add_user(person)
            bayesmodel.calculate_user(person)
        else:
            person.add_interests(person.words)
        # user.print_interests()
        if emojis:
            print(f"Getting emojis for {person.username}")
            get_emojis(person)
        users.append(person)
    Metrics.analyze_users(users)
Пример #4
0
from getemojis import get_emojis
import re
import json
import codecs

with open("./.header.lua", "r") as file:
    header = file.read()

with open("./.source.lua", "r") as file:
    source = file.read()

encoded = "{" + ",".join(
    ['["{}"]="{}"'.format(key, val)
     for key, val in get_emojis().items()]) + "}"

middle = "local codes = " + encoded

with codecs.open("emoji.lua", "w+", "utf-8", errors="surrogatepass") as file:
    file.write("\n\n".join([header, middle, source]))