def graph(self, data, output_folder, parent_folder): data = util.group_words_by_sender(data) to_plot = data[data['type'] == 'hashtag'] sns.set(style="darkgrid") plot = sns.barplot( y=to_plot['word'], x=to_plot['n_w'], hue=to_plot[config.SENDER_COLUMN_NAME], data=to_plot, palette = config.PALETTE, orient="h", order=to_plot.groupby('word').n_w.sum().sort_values(ascending=False).head(10).index, ) TITLE = "Most frequent hashtags" plt.suptitle(TITLE, y = 1) plot.set(xlabel='', ylabel='') plot.legend(bbox_to_anchor=(1.04,1), loc="upper left") plot.get_figure().savefig( "{}/{}.png".format(output_folder, slugify(TITLE)), bbox_inches='tight', pad_inches=config.PAD_INCHES ) plot.get_figure().clf()
def graph(self, data, output_folder, parent_folder): data = util.group_words_by_sender(data) names = data[config.SENDER_COLUMN_NAME].unique().tolist() first_names = sorted([x.split()[0].lower() for x in names]) to_plot = data[data['word'].isin(first_names)].groupby(['word', config.SENDER_COLUMN_NAME], as_index=False)[['n_w']].sum() sns.set(style="darkgrid") plot = sns.barplot( x=to_plot['word'], y=to_plot['n_w'], hue=to_plot[config.SENDER_COLUMN_NAME], data=to_plot, palette = config.PALETTE, ) TITLE = "Names said in chat" plt.suptitle(TITLE, y = 1) plot.set(xlabel='', ylabel='', xticklabels=["\"{}\"".format(x) for x in first_names]) plot.legend(bbox_to_anchor=(1.04,1), loc="upper left") plot.get_figure().savefig( "{}/{}.png".format(output_folder, slugify(TITLE)), bbox_inches='tight', pad_inches=config.PAD_INCHES ) plot.get_figure().clf()
def graph(self, data, output_folder, parent_folder): data = util.group_words_by_sender(data) to_plot = data[data['type'] == 'emoji'] sns.set(style="darkgrid") plot = sns.barplot( x=to_plot['word'], y=to_plot['n_w'], hue=to_plot[config.SENDER_COLUMN_NAME], data=to_plot, palette = config.PALETTE, order=to_plot.groupby('word')[['n_w']].sum().sort_values('n_w',ascending=False).head(10).index, ) util.add_custom_fonts() for item in plot.get_xticklabels(): item.set_family('EmojiOne') item.set_fontsize(20) emojis = [x.get_text() for x in plot.get_xticklabels()] print("Your top emojis:") print(" ".join(["{}. {}".format(i+1, e) for i, e in enumerate(emojis)])) TITLE = "Most frequent emoji" plt.suptitle(TITLE, y = 1) plot.set(xlabel='', ylabel='') plot.legend(bbox_to_anchor=(1.04,1), loc="upper left") plot.get_figure().savefig( "{}/{}.png".format(output_folder, slugify(TITLE)), bbox_inches='tight', pad_inches=config.PAD_INCHES ) plot.get_figure().clf()
def graph(self, data, output_folder, parent_folder): if self.type == None: raise ValueError("Grapher type must be set to a string") data = util.group_words_by_sender(data, get_tfidf=True) data = data[data['word'].str.len() > 1] data = data[data['type'] == 'word'] senders = data[config.SENDER_COLUMN_NAME].unique().tolist() N = len(senders) rows, cols = util.get_rows_cols(N) fig, ax = plt.subplots(figsize=(cols * 2, rows * 3), ncols=cols, nrows=rows, squeeze=False) plt.subplots_adjust( left = 0.2, bottom = 0.1, right = 2, top = 0.9, wspace = 0.5, hspace = 1.1 ) for i in range(N): ax[int(i / cols)][i % cols].set_title(senders[i], y = 1) to_plot = data[data[config.SENDER_COLUMN_NAME] == senders[i]] to_plot = to_plot.head(10)[[config.SENDER_COLUMN_NAME,'word','tf_idf']] plot = sns.barplot( y=to_plot['word'], x=to_plot['tf_idf'], data=to_plot, palette = config.PALETTE, orient="h", ax=ax[int(i / cols)][i % cols] ) plot.set( ylabel="", xlabel="Distinctiveness Score" ) TITLE = "Our Most Distinguishing {}".format(self.type) plt.suptitle(TITLE, y = 1.09, fontsize=20) fig.savefig( "{}/{}.png".format(output_folder, slugify(TITLE)), bbox_inches='tight', pad_inches=config.PAD_INCHES ) fig.clf()
def graph(self, data, output_folder, parent_folder): data = util.group_words_by_sender(data) # words only data = data[data['type'] == 'word'] # ignore contractions data = data[~data.word.str.contains("\'", na=False)] # ignore words under 4 chars data = data[data['word'].str.len() > 4] # filter out most common words with open("word_lists/common.txt") as f: common = f.readlines() common = [x.lower().strip() for x in common] to_plot = data.groupby([config.SENDER_COLUMN_NAME, 'word'], as_index=False)[['n_w']].sum() to_plot = to_plot[~to_plot.word.isin(common)] # ignore numbers to_plot = to_plot[~to_plot.word.isin([str(x) for x in range(0, 10)])] sns.set(style="darkgrid") plot = sns.barplot( y=to_plot['word'], x=to_plot['n_w'], hue=to_plot[config.SENDER_COLUMN_NAME], data=to_plot, palette=config.PALETTE, orient="h", order=to_plot.groupby('word').n_w.sum().sort_values( ascending=False).head(10).index, ) TITLE = "Most common words" plt.suptitle(TITLE, y=1) plot.set(xlabel='', ylabel='') plot.legend(bbox_to_anchor=(1.04, 1), loc="upper left") plot.get_figure().savefig("{}/{}.png".format(output_folder, slugify(TITLE)), bbox_inches='tight', pad_inches=config.PAD_INCHES) plot.get_figure().clf()