def summarize(document, all=True): doc = Document(document) sentences, offset = (doc.all_sentences() if all else doc.filtered_sentences()) # Ranker ranker = TextRank(sentences) ranker.rank() scores = ranker.scores # Selector summary = [] sum_len = 0 for x in range(num): idx = scores[x][0] + offset sent = doc[idx].sentence if sum_len + len(sent.split(' ')) > MAXLEN: break summary.append((sent, scores[x][1], doc.get_section_name(idx))) sum_len += len(sent.split(' ')) text = '' logit("\nP10-1024") logit("\nAll Sentences" if all else "\nFiltered Sentences") logit("Length of summary : " + str(sum_len)) for sent, score, section in summary: text += '\n' + "[" + section.encode('utf-8') + "] " + \ sent.encode('utf-8') #"[" + str(score) + "] " + sent.encode('utf-8') logit(text) # Printer # this has to be automated file = DIR['BASE'] + "data/Summary.txt" with open(file, 'w') as sfile: sfile.write('\n'.join([sent for sent, sc, sec in summary]).encode('utf-8')) # Evaluator guess_summary_list = [file] ref_summary_list = [[DIR['BASE'] + "data/P10-1024-Ref1.txt"]] recall, precision, F_measure = PythonROUGE(guess_summary_list, ref_summary_list, ngram_order=1) logit("Recall:{0} ; Precision:{1} ; F:{2}".format(recall, precision, F_measure))
def summarize(document, all=True): doc = Document(document) sentences, offset = (doc.all_sentences() if all else doc.filtered_sentences()) # Ranker ranker = TextRank(sentences) ranker.rank() scores = ranker.scores # Selector summary = [] sum_len = 0 for x in range(num): idx = scores[x][0] + offset sent = doc[idx].sentence if sum_len + len(sent.split(' ')) > MAXLEN: break summary.append((sent, scores[x][1], doc.get_section_name(idx))) sum_len += len(sent.split(' ')) text = '' logit("\nP10-1024") logit("\nAll Sentences" if all else "\nFiltered Sentences") logit("Length of summary : " + str(sum_len)) for sent, score, section in summary: text += '\n' + "[" + section.encode('utf-8') + "] " + \ sent.encode('utf-8') #"[" + str(score) + "] " + sent.encode('utf-8') logit(text) # Printer # this has to be automated file = DIR['BASE'] + "data/Summary.txt" with open(file, 'w') as sfile: sfile.write('\n'.join([sent for sent, sc, sec in summary]). encode('utf-8')) # Evaluator guess_summary_list = [file] ref_summary_list = [[DIR['BASE'] + "data/P10-1024-Ref1.txt"]] recall, precision, F_measure = PythonROUGE(guess_summary_list, ref_summary_list, ngram_order=1) logit("Recall:{0} ; Precision:{1} ; F:{2}".format(recall, precision, F_measure))