示例#1
0
 def draw_weight(self, sentence):
     attn_data = self.__get_attention(self.model, sentence)
     gv = GraphVisualizer()
     x = attn_data["text"]
     y = x
     x_data = []
     y_data = []
     z_data = []
     for x_index in range(len(x)):
         x_data.append("(" + str(x_index) + ")" + x[x_index])
         z_data.append(attn_data["attn"][11][11][x_index][x_index])
     data_meta_list = []
     data_meta = {
         "graph_type": "histogram",
         "data_name": "Y",
         "x_data": x_data,
         "y_data": z_data,
         "y_axis": "y2"
     }
     data_meta_list.append(data_meta)
     graph_meta = {
         "title": "BERT NER WEIGHT",
         "x_tickangle": -45,
         "y1_tickangle": 0,
         "y2_tickangle": 0,
         "x_name": "TOKEN",
         "y1_name": "WEIGHT",
         "y2_name": "WEIGHT",
     }
     return gv.draw_histogram(data_meta_list, graph_meta)
示例#2
0
 def get_plotly_graph(self, max_words=100):
     gv = GraphVisualizer()
     x = self.get_word_list()[:max_words]
     y = [score for word, score in self.get_tf_list()][:max_words]
     z = [score for word, score in self.get_tfidf_list()][:max_words]
     data_meta_list = []
     data_meta = {
         "graph_type": "histogram",
         "data_name": "TF",
         "x_data": x,
         "y_data": y,
         "y_axis": "y1",
     }
     data_meta_list.append(data_meta)
     data_meta = {
         "graph_type": "scatter",
         "data_name": "TF-IDF",
         "x_data": x,
         "y_data": z,
         "y_axis": "y2"
     }
     data_meta_list.append(data_meta)
     graph_meta = {
         "title": "단어빈도 및 TF-IDF (TF & TF-IDF)",
         "x_tickangle": -45,
         "y1_tickangle": 0,
         "y2_tickangle": 0,
         "x_name": "단어 (WORD)",
         "y1_name": "빈도 (TF)",
         "y2_name": "TF-IDF",
     }
     return gv.draw_histogram(data_meta_list, graph_meta)
示例#3
0
 def get_inertia_transition_graph(self, inertia_list):
     print(
         ".get_inertia_transition_graph() Will be replaced by .get_kmeans_inertia_transition_graph()"
     )
     gv = GraphVisualizer()
     gv.set_plotly()
     x = [i for i in range(1, len(inertia_list) + 1)]
     y = inertia_list
     data_meta_list = []
     data_meta = {
         "graph_type": "scatter",
         "data_name": "Y",
         "x_data": x,
         "y_data": y,
         "y_axis": "y1",
     }
     data_meta_list.append(data_meta)
     graph_meta = {
         "title": "K-Means Clutering Inertia Transition Graph",
         "x_tickangle": 0,
         "y1_tickangle": 0,
         "y2_tickangle": 0,
         "x_name": "NUMBER of CLUSTER",
         "y1_name": "INERTIA",
         "y2_name": "Y2",
     }
     return gv.draw_line_graph(data_meta_list, graph_meta)
示例#4
0
    def get_cluster_graph(self, df_result, label):
        gv = GraphVisualizer()
        gv.set_plotly()
        data_meta_list = []
        for i in OrderedDict.fromkeys(df_result[label]):
            content_label_list = []
            for content_label in df_result[df_result.predict == i]["content"]:
                if len(content_label) > 30:
                    content_label = content_label[:30] + "..."
                    content_label_list.append(content_label)
                else:
                    content_label_list.append(content_label)

            data_meta = {
                "data_name": i,
                "x_data": df_result[df_result[label] == i]["x"],
                "y_data": df_result[df_result[label] == i]["y"],
                "label": content_label_list
            }
            data_meta_list.append(data_meta)
        graph_meta = {
            "title": "Cluter Graph - " + label,
            "x_name": "TSNE X",
            "y_name": "TSNE Y"
        }
        return gv.draw_scatter(data_meta_list, graph_meta)
 def draw_weight(self, sentence):
     gv = GraphVisualizer()
     gv.set_plotly()
     token_list, weight_list = self.get_weight(sentence)
     x_data = [
         "(" + str(token_index) + ")" + token
         for token_index, token in enumerate(token_list)
     ]
     z_data = [weight for weight in weight_list]
     data_meta_list = []
     data_meta = {
         "graph_type": "histogram",
         "data_name": "Y",
         "x_data": x_data,
         "y_data": z_data,
         "y_axis": "y2"
     }
     data_meta_list.append(data_meta)
     graph_meta = {
         "title": "NER WEIGHT",
         "x_tickangle": -45,
         "y1_tickangle": 0,
         "y2_tickangle": 0,
         "x_name": "TOKEN",
         "y1_name": "WEIGHT",
         "y2_name": "WEIGHT",
     }
     return gv.draw_histogram(data_meta_list, graph_meta)
示例#6
0
 def get_kmeans_graph(self, df_result, label):
     gv = GraphVisualizer()
     gv.set_plotly()
     data_meta_list = []
     for predict in list(OrderedDict.fromkeys(df_result[label])):
         data_meta = {
             "data_name": predict,
             "x_data": df_result[df_result[label]==predict]["x"],
             "y_data": df_result[df_result[label]==predict]["y"],
             "label": predict
         }
         data_meta_list.append(data_meta)
     graph_meta = {
         "title": "K-Means Clutering Graph - " + label,
         "x_name": "TSNE X",
         "y_name": "TSNE Y"
     }
     return gv.draw_scatter(data_meta_list, graph_meta)
示例#7
0
 def get_weight(self, sentence):
     attn_data = self.__get_attention(self.model, sentence)
     gv = GraphVisualizer()
     token_list = attn_data["text"]
     weight_list = []
     for token_index in range(len(token_list)):
         weight_list.append(
             attn_data["attn"][11][11][token_index][token_index])
     return {"token_list": token_list, "weight_list": weight_list}
示例#8
0
 def get_co_matrix_graph(self, max_words):
     gv = GraphVisualizer()
     gv.set_plotly()
     co_word_list = self.result_list
     x_data = []
     max_freq = 0
     for co_word, freq in co_word_list[:max_words]:
         first_word = co_word[0]
         second_word = co_word[1]
         if max_freq < freq:
             max_freq = freq
         if first_word not in x_data:
             x_data.append(first_word)
         if second_word not in x_data:
             x_data.append(second_word)
     y_data = x_data
     z_data = [[0 for freq in x_data] for freq in y_data]
     for co_word, freq in co_word_list[:max_words]:
         first_word = co_word[0]
         second_word = co_word[1]
         z_data[x_data.index(first_word)][y_data.index(second_word)] = freq
     data_meta = {
         "colorbar_title": "동시출현빈도 (CO-WORD FREQUENCY)",
         "x_data": x_data,
         "y_data": y_data,
         "z_data": z_data
     }
     graph_meta = {
         "title": "동시출현빈도 매트릭스 (CO-WORD MATRIX)",
         "height": 1000,
         "width": 1000,
         "y_tickangle": -45,
         "y_name": "Y",
         "x_tickangle": -45,
         "x_name": "X",
     }
     return gv.draw_matrix(data_meta, graph_meta)
示例#9
0
 def draw_sentence_tree(self, sentence, label_list, edge_list):
     gv = GraphVisualizer()
     gv.set_plotly()
     return gv.draw_sentence_tree(sentence, label_list, edge_list)