Python LSA.filter_singular_values示例

编程语言: Python

命名空间/包名称: lsa

类/类型: LSA

方法/功能: filter_singular_values

hotexamples.com的示例: 1

Python LSA.filter_singular_values - 已找到1个示例。这些是从开源项目中提取的最受好评的lsa.LSA.filter_singular_values现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

LSA(11)

document_topics(1)

filter_singular_values(1)

fit(1)

folding_in(1)

get_salience_scores(1)

get_top_sent_indices(1)

plot(1)

start(1)

topic_labels(1)

train(1)

transform_plot(1)

unigram_smoothing(1)

word_topics(1)

示例#1

显示文件

    def summarize(self, text, n_sents=3):
        """ Summarize a given text and get top sentences """
        try:
            prediction = dict()

            if text:
                if self.lang_code in self.valid_langs:
                    if Utility.get_doc_length(text) > self.n_words:
                        # generate sentences, normalized sentences from text
                        sents, norm_sents = self.p.text_preprocessing(text)
                        # generate doc-term-matrix, term-doc-matrix
                        dt_matrix = self.generate_doc_term_matrix(norm_sents)
                        td_matrix = self.generate_term_doc_matrix(dt_matrix)

                        if self.method == "LSA":
                            lsa = LSA(self.k, td_matrix)
                            term_topic_matrix, singular_values, topic_doc_matrix = lsa.u, lsa.s, lsa.vt
                            # remove singular values below given treshold
                            singular_values = lsa.filter_singular_values(
                                singular_values, self.sv_threshold)
                            # get salience scores from top singular values & topic document matrix
                            salience_scores = lsa.get_salience_scores(
                                singular_values, topic_doc_matrix)
                            # get the top sentence indices for summarization
                            top_sentence_indices = lsa.get_top_sent_indices(
                                salience_scores, n_sents)
                            summary = self.generate_summary(
                                sents, top_sentence_indices)
                        elif self.method == "TEXT_RANK":
                            tr = TextRank(dt_matrix, td_matrix)
                            # build similarity graph
                            similarity_matrix = tr.similiarity_matrix
                            similarity_graph = tr.get_similarity_graph(
                                similarity_matrix)
                            # compute pagerank scores for all sentences
                            ranked_sents = tr.rank_sentences(similarity_graph)
                            # get the top sentence indices for summarization
                            top_sentence_indices = tr.get_top_sentence_indices(
                                ranked_sents, n_sents)
                            summary = self.generate_summary(
                                sents, top_sentence_indices)
                        else:
                            return "no method found"

                        # apply cleaning for readability
                        summary = Utility.remove_multiple_whitespaces(summary)
                        summary = Utility.remove_trailing_whitespaces(summary)
                        prediction["summary"] = summary
                        prediction["message"] = "successful"
                    else:
                        return "required at least {} words".format(
                            self.n_words)
                else:
                    return "language not supported".format()
            else:
                return "required textual content"
            return prediction
        except Exception:
            logging.error("exception occured", exc_info=True)