def graph(self, data, output_folder, parent_folder): if self.type == None: raise ValueError("Grapher type must be set to a string") data = util.group_words_by_term(data, get_tfidf=True) data = data[data['word'].str.len() > 1] data = data[data['type'] == 'word'] terms = sorted(data.term.unique().tolist()) N = len(terms) rows, cols = util.get_rows_cols(N) fig, ax = plt.subplots(figsize=(cols * 2, rows * 3), ncols=cols, nrows=rows, squeeze=False) plt.subplots_adjust( left = 0.2, bottom = 0.1, right = 2, top = 0.9, wspace = 0.5, hspace = 1.1 ) for i in range(N): ax[int(i / cols)][i % cols].set_title(terms[i], y = 1) to_plot = data[data['term'] == terms[i]] to_plot = to_plot.head(10)[['term','word','tf_idf']] plot = sns.barplot( y=to_plot['word'], x=to_plot['tf_idf'], data=to_plot, palette = config.PALETTE, orient="h", ax=ax[int(i / cols)][i % cols] ) plot.set( ylabel="", xlabel="Distinctiveness Score" ) TITLE = "Each Term's Most Distinguishing {}".format(self.type) plt.suptitle(TITLE, y = 1.09, fontsize=20) fig.savefig( "{}/{}.png".format(output_folder, slugify(TITLE)), bbox_inches='tight', pad_inches=config.PAD_INCHES ) fig.clf()
def graph(self, data, output_folder, parent_folder): data = util.group_words_by_sender(data, get_tfidf=True) data = data[data['word'].str.len() > 1] data = data[data['type'] == 'word'] senders = data[config.SENDER_COLUMN_NAME].unique().tolist() N = len(senders) rows, cols = util.get_rows_cols(N) fig, ax = plt.subplots(figsize=(cols * 2, rows * 3), ncols=cols, nrows=rows, squeeze=False) plt.subplots_adjust(left=0.2, bottom=0.1, right=2, top=0.9, wspace=0.5, hspace=1.1) for i in range(N): ax[int(i / cols)][i % cols].set_title(senders[i], y=1) to_plot = data[data[config.SENDER_COLUMN_NAME] == senders[i]] to_plot = to_plot.head(10)[[ config.SENDER_COLUMN_NAME, 'word', 'tf_idf' ]] plot = sns.barplot(y=to_plot['word'], x=to_plot['tf_idf'], data=to_plot, palette=config.PALETTE, orient="h", ax=ax[int(i / cols)][i % cols]) plot.set(ylabel="", xlabel="Distinctiveness Score") TITLE = "Our Most Distinguishing Words" plt.suptitle(TITLE, y=1.09, fontsize=20) fig.savefig("{}/{}.png".format(output_folder, slugify(TITLE)), bbox_inches='tight', pad_inches=config.PAD_INCHES) fig.clf()