def summarize4(sents, docs=None): if not docs: docs = [list(Tokenize(sent)) for sent in sents] sim_res = bm25_weights(docs) rank = TextRank(sim_res) rank.solve() top_n_summary = [] for index in sorted(rank.top_index(3)): top_n_summary.append(sents[index]) return u"。 ".join(top_n_summary).replace("\r", "").replace("\n", "") + u"。"
def summarize4(sents, docs=None): if not docs: docs = [list(Tokenize(sent)) for sent in sents] sim_res = bm25_weights(docs) rank = TextRank(sim_res) rank.solve() top_n_summary = [] for index in sorted(rank.top_index(3)): top_n_summary.append(sents[index]) return u'。 '.join(top_n_summary).replace('\r', '').replace('\n', '') + u'。'
class Order: def __init__(self, text, seg=None, tagger=None): self.text = text self.tagger = tagger if tagger is not None else self.get_tagger() self.seg = seg if seg is not None else self.get_seg() self.words_merge = None def get_keywords(self, limit=5, merge=False): doc = [] sentences = self.get_sentences() for sentence in sentences: words = list(self.seg.seg(sentence)) words = self.filter_stop(words) doc.append(words) self.keywordrank = KeywordRank(doc) self.keywordrank.solve() result = [] for w in self.keywordrank.top_index(limit): result.append(w) if merge: wm = self.words_merge.merge(self.text, result) return wm.merge() return result def get_summaries(self, limit=5): doc = [] sentences = self.get_sentences() for sentence in sentences: words = list(self.seg.seg(sentence)) words = self.filter_stop(words) doc.append(words) self.textrank = TextRank(doc) self.textrank.solve() result = [] for index in self.textrank.top_index(limit): result.append(sentences[index]) return result def get_sentences(self): line_break_re = re.compile('[\r\n]') delimiter_re = re.compile('[,。?!;]') sentences = [] for line in line_break_re.split(self.text): line = line.strip() if not line: continue for sentence in delimiter_re.split(line): sentence = sentence.strip() if not sentence: continue sentences.append(sentence) return sentences def get_seg(self, fname='seg.pickle'): seg = Seg() seg.load(fname) return seg def get_tagger(self, fname='tag.pickle'): tagger = Tag() tagger.load(fname) return tagger def filter_stop(self, words): return list(filter(lambda x: x not in stop_words, words))