Пример #1
0
    def summarize(self, text, n):
        text = text.replace('\n', '')
        text = text.replace('\r', '')
        text = utils.as_text(text)
        tokens = utils.cut_sentences(text)
        sentences, sents = utils.cut_filter_words(tokens, self.__stop_words, self.__use_stopword)

        graph = self.create_graph(sents)
        scores = utils.weight_map_rank(graph, self.__max_iter, self.__tol)
        sent_selected = nlargest(n, zip(scores, count()))
        sent_index = []
        for i in range(n):
            sent_index.append(sent_selected[i][1])
        return [sentences[i] for i in sent_index]
Пример #2
0
    def keywords(self, text, n):
        text = text.replace('\n', '')
        text = text.replace('\r', '')
        text = utils.as_text(text)
        tokens = utils.cut_sentences(text)
        sentences, sents = utils.psegcut_filter_words(tokens,
                                                      self.__stop_words,
                                                      self.__use_stopword)

        word_index, index_word, words_number = self.build_vocab(sents)
        graph = self.create_graph(sents, words_number,
                                  word_index, window=self.__window)
        scores = utils.weight_map_rank(graph, max_iter=self.__max_iter,
                                       tol=self.__tol)
        sent_selected = nlargest(n, zip(scores, count()))
        sent_index = []
        for i in range(n):
            sent_index.append(sent_selected[i][1])
        return [index_word[i] for i in sent_index]