def draw_weight(self, sentence): attn_data = self.__get_attention(self.model, sentence) gv = GraphVisualizer() x = attn_data["text"] y = x x_data = [] y_data = [] z_data = [] for x_index in range(len(x)): x_data.append("(" + str(x_index) + ")" + x[x_index]) z_data.append(attn_data["attn"][11][11][x_index][x_index]) data_meta_list = [] data_meta = { "graph_type": "histogram", "data_name": "Y", "x_data": x_data, "y_data": z_data, "y_axis": "y2" } data_meta_list.append(data_meta) graph_meta = { "title": "BERT NER WEIGHT", "x_tickangle": -45, "y1_tickangle": 0, "y2_tickangle": 0, "x_name": "TOKEN", "y1_name": "WEIGHT", "y2_name": "WEIGHT", } return gv.draw_histogram(data_meta_list, graph_meta)
def get_plotly_graph(self, max_words=100): gv = GraphVisualizer() x = self.get_word_list()[:max_words] y = [score for word, score in self.get_tf_list()][:max_words] z = [score for word, score in self.get_tfidf_list()][:max_words] data_meta_list = [] data_meta = { "graph_type": "histogram", "data_name": "TF", "x_data": x, "y_data": y, "y_axis": "y1", } data_meta_list.append(data_meta) data_meta = { "graph_type": "scatter", "data_name": "TF-IDF", "x_data": x, "y_data": z, "y_axis": "y2" } data_meta_list.append(data_meta) graph_meta = { "title": "단어빈도 및 TF-IDF (TF & TF-IDF)", "x_tickangle": -45, "y1_tickangle": 0, "y2_tickangle": 0, "x_name": "단어 (WORD)", "y1_name": "빈도 (TF)", "y2_name": "TF-IDF", } return gv.draw_histogram(data_meta_list, graph_meta)
def get_inertia_transition_graph(self, inertia_list): print( ".get_inertia_transition_graph() Will be replaced by .get_kmeans_inertia_transition_graph()" ) gv = GraphVisualizer() gv.set_plotly() x = [i for i in range(1, len(inertia_list) + 1)] y = inertia_list data_meta_list = [] data_meta = { "graph_type": "scatter", "data_name": "Y", "x_data": x, "y_data": y, "y_axis": "y1", } data_meta_list.append(data_meta) graph_meta = { "title": "K-Means Clutering Inertia Transition Graph", "x_tickangle": 0, "y1_tickangle": 0, "y2_tickangle": 0, "x_name": "NUMBER of CLUSTER", "y1_name": "INERTIA", "y2_name": "Y2", } return gv.draw_line_graph(data_meta_list, graph_meta)
def get_cluster_graph(self, df_result, label): gv = GraphVisualizer() gv.set_plotly() data_meta_list = [] for i in OrderedDict.fromkeys(df_result[label]): content_label_list = [] for content_label in df_result[df_result.predict == i]["content"]: if len(content_label) > 30: content_label = content_label[:30] + "..." content_label_list.append(content_label) else: content_label_list.append(content_label) data_meta = { "data_name": i, "x_data": df_result[df_result[label] == i]["x"], "y_data": df_result[df_result[label] == i]["y"], "label": content_label_list } data_meta_list.append(data_meta) graph_meta = { "title": "Cluter Graph - " + label, "x_name": "TSNE X", "y_name": "TSNE Y" } return gv.draw_scatter(data_meta_list, graph_meta)
def draw_weight(self, sentence): gv = GraphVisualizer() gv.set_plotly() token_list, weight_list = self.get_weight(sentence) x_data = [ "(" + str(token_index) + ")" + token for token_index, token in enumerate(token_list) ] z_data = [weight for weight in weight_list] data_meta_list = [] data_meta = { "graph_type": "histogram", "data_name": "Y", "x_data": x_data, "y_data": z_data, "y_axis": "y2" } data_meta_list.append(data_meta) graph_meta = { "title": "NER WEIGHT", "x_tickangle": -45, "y1_tickangle": 0, "y2_tickangle": 0, "x_name": "TOKEN", "y1_name": "WEIGHT", "y2_name": "WEIGHT", } return gv.draw_histogram(data_meta_list, graph_meta)
def get_kmeans_graph(self, df_result, label): gv = GraphVisualizer() gv.set_plotly() data_meta_list = [] for predict in list(OrderedDict.fromkeys(df_result[label])): data_meta = { "data_name": predict, "x_data": df_result[df_result[label]==predict]["x"], "y_data": df_result[df_result[label]==predict]["y"], "label": predict } data_meta_list.append(data_meta) graph_meta = { "title": "K-Means Clutering Graph - " + label, "x_name": "TSNE X", "y_name": "TSNE Y" } return gv.draw_scatter(data_meta_list, graph_meta)
def get_weight(self, sentence): attn_data = self.__get_attention(self.model, sentence) gv = GraphVisualizer() token_list = attn_data["text"] weight_list = [] for token_index in range(len(token_list)): weight_list.append( attn_data["attn"][11][11][token_index][token_index]) return {"token_list": token_list, "weight_list": weight_list}
def get_co_matrix_graph(self, max_words): gv = GraphVisualizer() gv.set_plotly() co_word_list = self.result_list x_data = [] max_freq = 0 for co_word, freq in co_word_list[:max_words]: first_word = co_word[0] second_word = co_word[1] if max_freq < freq: max_freq = freq if first_word not in x_data: x_data.append(first_word) if second_word not in x_data: x_data.append(second_word) y_data = x_data z_data = [[0 for freq in x_data] for freq in y_data] for co_word, freq in co_word_list[:max_words]: first_word = co_word[0] second_word = co_word[1] z_data[x_data.index(first_word)][y_data.index(second_word)] = freq data_meta = { "colorbar_title": "동시출현빈도 (CO-WORD FREQUENCY)", "x_data": x_data, "y_data": y_data, "z_data": z_data } graph_meta = { "title": "동시출현빈도 매트릭스 (CO-WORD MATRIX)", "height": 1000, "width": 1000, "y_tickangle": -45, "y_name": "Y", "x_tickangle": -45, "x_name": "X", } return gv.draw_matrix(data_meta, graph_meta)
def draw_sentence_tree(self, sentence, label_list, edge_list): gv = GraphVisualizer() gv.set_plotly() return gv.draw_sentence_tree(sentence, label_list, edge_list)