def __init__(self): """ ROUGE based summariser. This compares each sentence in the paper to the abstract to see which ones make the best summaries for the abstract. It is assumed that these sentences will then also be good highlights for the paper. """ self.summary_length = 10 self.r = Rouge() self.preprocessor = AbstractNetPreprocessor() self.computation_graph = graph() self.features_input = self.computation_graph["features_input"] self.prediction_probs = self.computation_graph["prediction_probs"] self.similarity_threshold = 0.75
# Create the feed_dict feed_dict = { sentence_input: batch_inputs, abstract_input: batch_abstracts, features_input: batch_features, labels: batch_labels, seq_lens: lens, keep_prob: 1 } # Run accuracy and loss raw_probs_summnet = sess.run(raw_predictions, feed_dict=feed_dict) prob_pos_summnet = raw_probs_summnet[:, 1] tf.reset_default_graph() features_graph = features_mlp.graph() features_prediction_probs = features_graph["prediction_probs"] sentence_input = features_graph["features_input"] labels = features_graph["labels"] loss = features_graph["loss"] predictions = features_graph["prediction_probs"] pred_answers = features_graph["prediction_class"] correct_answers = features_graph["correct_answers"] accuracy = features_graph["accuracy"] with tf.Session() as sess: # Initialise all variables sess.run(tf.global_variables_initializer()) # Saving object
def summarise(self, filename): """ Generates a summary of the paper. :param filename: the name of the file to summaries :return: a sumamry of the paper. """ # Each item has form (sentence, sentence_vector, abstract_vector, features) paper = self.prepare_paper(filename) # ========> Code from here on is summariser specific <======== # Stores sentences, the probability of them being good summaries and their position in the paper sentences_and_summary_probs = [] # Summary according to features sentences_feat_summary_probs = [] tf.reset_default_graph() computation_graph = lstm_classifier.graph() sentence_input = computation_graph["inputs"] seq_lens = computation_graph["sequence_lengths"] prediction_probs = computation_graph["prediction_probs"] keep_prob = computation_graph["keep_prob"] with tf.Session() as sess: # Initialise all variables sess.run(tf.global_variables_initializer()) # Saving object saver = tf.train.Saver() # Restore the saved model saver.restore(sess, lstm_classifier.SAVE_PATH) # Number of sentences in the paper num_sents = len(paper) # ----> Create the matrix for sentences for the LSTM <---- sentence_list = [] for sent, sent_vec, abs_vec, feats in paper: if len(sent) < MAX_SENT_LEN: sentence_list.append(sent) else: sentence_list.append(sent[0:MAX_SENT_LEN]) # Get the matrix representation of the sentences sentence_matrix, sent_lens = sents2input(sentence_list, num_sents) # Create the feed_dict feed_dict = { sentence_input: sentence_matrix, seq_lens: sent_lens, keep_prob: 1 } # Predict how good a summary each sentence is using the computation graph probs = sess.run(prediction_probs, feed_dict=feed_dict) # Store the sentences and probabilities in a list to be sorted for i in range(num_sents): sentence = paper[i][0] sentence_vec = paper[i][1] prob = probs[i][1] sentences_and_summary_probs.append( (sentence, sentence_vec, prob, i)) tf.reset_default_graph() features_graph = features_mlp.graph() features_classifier_input = features_graph["features_input"] features_prediction_probs = features_graph["prediction_probs"] with tf.Session() as sess: # Initialise all variables sess.run(tf.global_variables_initializer()) # Saving object saver = tf.train.Saver() # ====> Run the second graph <==== saver.restore(sess, features_mlp.SAVE_PATH) # ----> Create the matrix of features for the LSTM <---- feature_matrix = np.zeros((num_sents, NUM_FEATURES), dtype=np.float32) i = 0 for _, _, _, feat in paper: feature_matrix[i, :] = feat i += 1 # Predict how good a summary each sentence is using the computation graph probs = sess.run( features_prediction_probs, feed_dict={features_classifier_input: feature_matrix}) # Store the sentences and probabilities in a list to be sorted for i in range(num_sents): sentence = paper[i][0] sentence_vec = paper[i][1] prob = probs[i][1] sentences_feat_summary_probs.append( (sentence, sentence_vec, prob, i)) # ====> Combine the results <==== # This list is now sorted by the probability of the sentence being a good summary sentence #sentences_and_summary_probs = [x for x in reversed(sorted(sentences_and_summary_probs, key=itemgetter(2)))] # Sort features list in probability order #sentences_feat_summary_probs = [x for x in reversed(sorted(sentences_feat_summary_probs, key=itemgetter(2)))] summary = [] sents_already_added = set() # ====> Attempt Four <==== final_sents_probs = [] for item in zip(sentences_feat_summary_probs, sentences_and_summary_probs): prob_summNet = item[1][2] * (1 - self.C) prob_Features = item[0][2] * (1 + self.C) avg_prob = (prob_summNet + prob_Features) / 2 final_sents_probs.append( (item[0][0], item[0][1], avg_prob, item[0][3])) final_sents_probs = [ x for x in reversed(sorted(final_sents_probs, key=itemgetter(2))) ] summary = final_sents_probs[0:self.summary_length] """ # ====> Attempt Three <==== # Take summary sentences from features summary = sentences_feat_summary_probs[0:self.summary_length] for item in summary: sents_already_added.add(item[3]) # Add ones from summary net if it's sure of them and they aren't there already max_additional = 5 count_additional = 0 for item in sentences_and_summary_probs: if count_additional > max_additional: break if item[3] not in sents_already_added and item[2] > 0.95: summary.append(item) sents_already_added.add(item[3]) count_additional += 1 """ """ # ====> Attempt Two <==== i = 0 while len(summary) < self.summary_length: if i >= len(sentences_feat_summary_probs) and i >= len(sentences_and_summary_probs): break feats = sentences_feat_summary_probs[i] summNet = sentences_and_summary_probs[i] feats_prob = feats[2] summNet_prob = summNet[2] if feats_prob >= summNet_prob and feats[3] not in sents_already_added: summary.append(feats) sents_already_added.add(feats[3]) elif summNet_prob > feats_prob and summNet[3] not in sents_already_added: summary.append(summNet) sents_already_added.add(summNet[3]) i += 1 """ """ # ====> Attempt One <==== # True to select a summary sentence from summ_net, false to select from features summ_net = True for i in range(num_sents): if len(summary) >= self.summary_length \ or len(sentences_and_summary_probs) <= 0 \ or len(sentences_feat_summary_probs) <= 0: break added = False if summ_net: while not added: if len(sentences_and_summary_probs) <= 0: break highest_prob = sentences_and_summary_probs.pop(0) if highest_prob[3] in sents_already_added or len(highest_prob[0]) < self.min_sent_len: continue else: summary.append(highest_prob) sents_already_added.add(highest_prob[3]) added = True summ_net = False else: while not added: if len(sentences_feat_summary_probs) <= 0: break highest_prob = sentences_feat_summary_probs.pop(0) if highest_prob[3] in sents_already_added or len(highest_prob[0]) < self.min_sent_len: continue else: summary.append(highest_prob) sents_already_added.add(highest_prob[3]) added = True summ_net = True """ # Order sumamry sentences according to the order they appear in the paper ordered_summary = sorted(summary, key=itemgetter(-1)) # Print the summary summary = [] for sentence, sentence_vec, prob, pos in ordered_summary: sentence = " ".join(sentence) summary.append((sentence, pos)) useful_functions.write_summary(SUMMARY_WRITE_LOC, summary, filename.strip(".txt")) for sentence in summary: print(sentence) print()