def plot_polarity_hourly_en(user): path = f"{URL_ROOT}/static/images/{user.username}_polarity_hourly.png" if os.path.isfile(path): os.remove(path) df = get_tweets(user) df["polarity"] = df["tweet"].apply(get_polarity_en) df["polarity"] = df["polarity"].apply(lambda x: x * 100) df["hour"] = df["time"].apply(get_hours) hours = np.arange(0, 24, 1) sums = [0] * 24 counts = [0] * 24 for t in list(df["time"]): counts[int(str(t)[0:2])] += 1 for i in range(0, len(df)): sums[int(str(df["time"].iloc[i][0:2]))] += df["polarity"].iloc[i] # To avoid zero-division: for i in range(0, len(counts)): if counts[i] == 0: counts[i] += 1 means = np.array(sums) / np.array(counts) plt.scatter(df["hour"], df["polarity"], alpha=0.50) plt.plot(hours, means, color="red") plt.title("Distribution of polarity score within 24h scale") plt.xticks(hours) plt.yticks(np.arange(-100, 110, 10)) plt.xlabel("Hours (00:00 - 23:00)") plt.ylabel("Average polarity score") plt.savefig(path) plt.clf() return path
def get_subjectivity_score_tr(user): # Gets polarity score for TR tweets # -100 is the negative end, 100 is the positive end tr = Translator() tweets = get_tweets(user) tweets_text = tweets_to_string(tweets) tr_text = tr.translate(tweets_text).text return int(TextBlob(tr_text).sentiment.subjectivity * 100)
def get_gender_pred_tr(user): # We only have male and female for the turkish model model_path = f"{URL_ROOT}models/tr/gender_model_tr.pkl" model = pickle.load(open(model_path, "rb")) tweets = tweets_to_string(get_tweets(user)) gender_pred = model.predict_proba((pd.Series(tweets))) male_pred = gender_pred[:, 0] male_score = int(male_pred * 100) female_score = 100 - male_score return {"male": male_score, "female": female_score}
def get_gender_pred_en(user): model_path = f"{URL_ROOT}models/en/gender_model_en.pkl" model = pickle.load(open(model_path, "rb")) tweets = tweets_to_string(get_tweets(user)) gender_pred = model.predict_proba((pd.Series(tweets))) male_pred = gender_pred[:, 0] brand_pred = gender_pred[:, 2] male_score = int(male_pred * 100) brand_score = int(brand_pred * 100) female_score = 100 - male_score - brand_score return {"male": male_score, "female": female_score, "brand": brand_score}
def get_subjectivity_score_tr(user): # Gets subjectivity score for TR tweets # -100 is the negative end, 100 is the positive end tr = Translator() score = 0 tweets = get_tweets(user) tweets_text = tweets_to_string(tweets) text_list = [] for i in range(0, len(tweets_text), 15000): text_list.append(tweets_text[i:i + 15000]) translations = tr.translate(text_list) for translation in translations: score += int(TextBlob(translation.text).sentiment.subjectivity * 100) score = int(score / len(text_list)) return score
def plot_polarity_date_en(user): path = f"{URL_ROOT}/static/images/{user.username}_polarity_date.png" if os.path.isfile(path): os.remove(path) df = get_tweets(user) polarity = df["tweet"].apply(get_polarity_en) polarity = polarity.apply(lambda x: x * 100) plt.bar(df["date"][::-1], polarity) plt.title("Polarity score for the last 500 tweets") plt.xticks([df["date"].iloc[-1], df["date"].iloc[0]]) plt.yticks(np.arange(-100, 110, 10)) plt.xlabel("Date") plt.ylabel("Polarity score") plt.savefig(path) plt.clf() return path
def plot_subjectivity_date_en(user): path = f"{URL_ROOT}/static/images/{user.username}_subjectivity_date.png" if os.path.isfile(path): os.remove(path) df = get_tweets(user) subjectivity = df["tweet"].apply(get_subjectivity_en) subjectivity = subjectivity.apply(lambda x: x * 100) plt.bar(df["date"][::-1], subjectivity) plt.title("Subjectivity score for the last 500 tweets") plt.xticks([df["date"].iloc[-1], df["date"].iloc[0]], rotation=90) plt.yticks(np.arange(0, 110, 10)) plt.xlabel("Date") plt.ylabel("Subjectivity score") plt.tight_layout(h_pad=9, w_pad=8) plt.savefig(path) plt.clf() return path
def plot_tweet_frequency_hours(user): # Plots tweet frequency of last 500 tweets within 24h scale path = f"{URL_ROOT}/static/images/{user.username}_frequency_hourly.png" if os.path.isfile(path): os.remove(path) df = get_tweets(user) time_list = list(df["time"]) counts = [0] * 24 hours = np.arange(0, 24, 1) for t in time_list: counts[int(str(t)[0:2])] += 1 plt.bar(hours, counts) plt.title("Tweeting frequency within 24h scale (for the last 500 tweets)") plt.xticks(hours) plt.xlabel("Hours (00:00 to 23:00)") plt.ylabel("Number of tweets") plt.savefig(path) plt.clf() return path
def plot_tweet_frequency_date(user): # Returns a plot showing the frequency of last 500 tweets with respect to date # The scale of the date changes according to the tweet frequency of the user path = f"{URL_ROOT}/static/images/{user.username}_frequency_date.png" if os.path.isfile(path): os.remove(path) df = get_tweets(user) first_date, last_date = pd.to_datetime( df["date"].iloc[-1]), pd.to_datetime(df["date"].iloc[0]) date_range = pd.date_range(first_date, last_date).tolist() daily_tweet_count = [] for date in date_range: daily_tweet_count.append(len(df[pd.to_datetime(df["date"]) <= date])) plt.plot(date_range, daily_tweet_count) plt.title("Tweeting frequency for the last 500 tweets") plt.xlabel("Date") plt.ylabel("Total number of tweets") plt.savefig(path) plt.clf() return path
def create_wordclouds(user): # Returns the path for wordclouds for tweets and mentioned path_tw = f"{URL_ROOT}static/images/{user.username}_tw_wordcloud.png" path_ment = f"{URL_ROOT}static/images/{user.username}_ment_wordcloud.png" path_stopwords_tr = f"{URL_ROOT}models/datasets/stopwords-tr.txt" stopwords_tr = txt_to_list(path_stopwords_tr) \ + ["bi", "var", "yok", "sadece", "bence", "sence", "bi", "evet", "hayır", "peki", "tamam", "başka", "aynı", "lazım", "yav", "lan", "la", "olm"] stopwords_tr_en = list(STOPWORDS) + stopwords_tr if os.path.isfile(path_tw): os.remove(path_tw) if os.path.isfile(path_ment): os.remove(path_ment) tw_str = tweets_to_string(get_tweets(user)) ment_str = tweets_to_string(get_mentioned(user)) cloud_tw = WordCloud(stopwords=set(stopwords_tr_en)) cloud_tw.generate(tw_str) cloud_tw.to_file(path_tw) ment_tw = WordCloud(stopwords=set(stopwords_tr_en)) ment_tw.generate(ment_str) ment_tw.to_file(path_ment) return {"tweet_wordcloud": path_tw, "ment_wordcloud": path_ment}
def get_subjectivity_score_en(user): # Gets subjectivity score for EN tweets # 0 is the factual end, 100 is the subjective end tweets = get_tweets(user) tweets_text = tweets_to_string(tweets) return int(TextBlob(tweets_text).sentiment.subjectivity * 100)
def get_polarity_score_en(user): # Gets polarity score (is the tweet negative, positive or neutral?) for EN tweets # -100 is the negative end, 100 is the positive end tweets = get_tweets(user) tweets_text = tweets_to_string(tweets) return int(TextBlob(tweets_text).sentiment.polarity * 100)