def handle_arguments(): # Handle the command line arguments. if not argv[3].isdigit(): print("The quantity parameter must be an integer.") return file = argv[1] parameter = argv[2] quantity = int(argv[3]) if parameter != '-s' and parameter != '-w': print_usage() return words = extractor.get_words(file) words_scores = get_word_scores(words) sentences = extractor.get_sentences(file) sentences_scores = get_sentence_scores_dict(sentences, words_scores) if parameter == '-s': if quantity > len(sentences): print( "Quantity specified is greater than the number of sentences.") else: print_popular(sentences_scores, sort_dictionary(sentences_scores), quantity) else: if quantity > len(words): print("Quantity specified is greater than the number of words.") else: print_popular(words_scores, sort_dictionary(words_scores), quantity)
def summarize(filename, num_of_sentences): # Summarize a file. The length of the summary will be the number of sentences specified. file = filename # Extract all the words and sentences and get their respective scores. all_words = extractor.get_words(file) word_scores = scoring.get_word_scores(all_words) all_sentences = extractor.get_sentences(file) all_sentences = filter.omit_transition_sentences(all_sentences) sentence_scores = scoring.get_sentence_scores_list(all_sentences, word_scores) if num_of_sentences > len(all_sentences): print("The summary cannot be longer than the text.") return # Get x sentences with the highest scores, in chronological order. threshold = scoring.x_highest_score(sentence_scores, num_of_sentences) top_sentences = scoring.top_sentences(all_sentences, sentence_scores, threshold) # Put the top sentences into one string. summary = "" for sentence in top_sentences: summary += sentence + " " summary = summary[:-1] print(summary)
def summarize(filename, topics, input_words, num_of_sentences): # Summarize a file. The length of the summary will be the number of sentences specified. file = filename # Extract all the words and sentences and get their respective scores. all_words = extractor.get_words(file) word_scores = scoring.get_word_scores(all_words) all_sentences = extractor.get_sentences(file) all_sentences = filter.omit_transition_sentences(all_sentences) all_sentences = topic_sent(all_sentences, topics) i = 0 complete_summary = [] for all_sentences_part in all_sentences: num_of_sentences_new = num_of_sentences sentence_scores_part = scoring.get_sentence_scores_list( all_sentences_part, word_scores) all_sentences_part, sentence_scores_part = additional_filter.remove_duplicates( all_sentences_part, sentence_scores_part) if num_of_sentences_new > len(all_sentences_part): #print("The summary cannot be longer than the text.") num_of_sentences_new = len(all_sentences_part) # Get x sentences with the highest scores, in chronological order. threshold = scoring.x_highest_score(sentence_scores_part, num_of_sentences_new) top_sentences = scoring.top_sentences(all_sentences_part, sentence_scores_part, threshold) # Put the top sentences into one string. top_sentences = top_sentences[-num_of_sentences_new:] summary = input_words[i] + ": \n" i = i + 1 for sentence in top_sentences: summary += sentence + " " complete_summary.append(summary + '\n') return complete_summary
for phrase in transition_phrases: if lower.startswith(phrase): return True return False def omit_transition_sentences(sentences): transition_phrases = get_transition_phrases() result = [] for sentence in sentences: if not is_transition_phrase(transition_phrases, sentence): result.append(sentence) return result if __name__ == "__main__": if len(argv) == 2: transition_phrases = get_transition_phrases() sentences = extractor.get_sentences(argv[1]) count = 0 for sentence in sentences: for phrase in transition_phrases: lower = sentence.lower() if lower.startswith(phrase): print("Omitted: " + sentence) count += 1 break print("Omitted", count, "sentence(s).") else: print_usage()