def summarize(self, text, n_sents=3): """ Summarize a given text and get top sentences """ try: prediction = dict() if text: if self.lang_code in self.valid_langs: if Utility.get_doc_length(text) > self.n_words: # generate sentences, normalized sentences from text sents, norm_sents = self.p.text_preprocessing(text) # generate doc-term-matrix, term-doc-matrix dt_matrix = self.generate_doc_term_matrix(norm_sents) td_matrix = self.generate_term_doc_matrix(dt_matrix) if self.method == "LSA": lsa = LSA(self.k, td_matrix) term_topic_matrix, singular_values, topic_doc_matrix = lsa.u, lsa.s, lsa.vt # remove singular values below given treshold singular_values = lsa.filter_singular_values( singular_values, self.sv_threshold) # get salience scores from top singular values & topic document matrix salience_scores = lsa.get_salience_scores( singular_values, topic_doc_matrix) # get the top sentence indices for summarization top_sentence_indices = lsa.get_top_sent_indices( salience_scores, n_sents) summary = self.generate_summary( sents, top_sentence_indices) elif self.method == "TEXT_RANK": tr = TextRank(dt_matrix, td_matrix) # build similarity graph similarity_matrix = tr.similiarity_matrix similarity_graph = tr.get_similarity_graph( similarity_matrix) # compute pagerank scores for all sentences ranked_sents = tr.rank_sentences(similarity_graph) # get the top sentence indices for summarization top_sentence_indices = tr.get_top_sentence_indices( ranked_sents, n_sents) summary = self.generate_summary( sents, top_sentence_indices) else: return "no method found" # apply cleaning for readability summary = Utility.remove_multiple_whitespaces(summary) summary = Utility.remove_trailing_whitespaces(summary) prediction["summary"] = summary prediction["message"] = "successful" else: return "required at least {} words".format( self.n_words) else: return "language not supported".format() else: return "required textual content" return prediction except Exception: logging.error("exception occured", exc_info=True)