Exemplos de calculate_tf_idf em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: Dev.DataTools.useful_functions

Método / Função: calculate_tf_idf

Exemplos em hotexamples.com: 2

calculate_tf_idf em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de Dev.DataTools.useful_functions.calculate_tf_idf em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Exemplo n.º 1

0

Exibir arquivo

Arquivo: AbstractNetPreprocessor.py Projeto: callaghanmt/summaries

def calculate_features(self, sentence, bag_of_words, keyphrases, abstract, title, section, shorter=False): """ Calculates the features for a sentence. :param sentence: the sentence to calculate features for, as a list of words. :param bag_of_words: a dictionary bag of words representation for the paper, keys are words vals are counts. :param keyphrases: the keyphrases of the paper :param shorter: returns a shorter list of features :param abstract: the abstract of the paper as a list of strings :param title: the title of the paper as a string :param section: the section of the paper the sentence came from :return: a vector of features for the sentence. """ # Calculate features abstract_rouge_score = useful_functions.compute_rouge_abstract_score(sentence, abstract) tf_idf = useful_functions.calculate_tf_idf(sentence, self.global_paper_count, bag_of_words) document_tf_idf = useful_functions.calculate_document_tf_idf(sentence, bag_of_words) keyphrase_score = useful_functions.calculate_keyphrase_score(sentence, keyphrases) title_score = useful_functions.calculate_title_score(sentence, set([x for x in title if x not in STOPWORDS])) sent_len = len(sentence) numeric_count = len([word for word in sentence if useful_functions.is_number(word)]) sec = -1 if "HIGHLIGHT" in section: sec = HIGHLIGHT elif "ABSTRACT" in section: sec = ABSTRACT elif "INTRODUCTION" in section: sec = INTRODUCTION elif "RESULT" in section or "DISCUSSION" in section: sec = RESULT_DISCUSSION elif "CONCLUSION" in section: sec = CONCLUSION elif "METHOD" in section: sec = METHOD else: sec = OTHER if shorter: return abstract_rouge_score, tf_idf, document_tf_idf, keyphrase_score, title_score, numeric_count, \ sent_len, sec if sent_len > 2 and sentence[0] in self.vocab: first_word = self.word2vec[sentence[0]] else: first_word = [0] * self.word2vec_feature_nums if sent_len > 2 and sentence[0] in self.vocab and sentence[1] in self.vocab: first_pair = np.concatenate((self.word2vec[sentence[0]], self.word2vec[sentence[1]])) else: first_pair = [0] * (self.word2vec_feature_nums * 2) return abstract_rouge_score, tf_idf, document_tf_idf, keyphrase_score, title_score, numeric_count, \ sent_len, sec, first_word, first_pair

Exemplo n.º 2

0

Exibir arquivo

Arquivo: TFIDFSummariser.py Projeto: callaghanmt/summaries

def summarise(self, filename): """ Generates a summary of the paper. :param filename: the name of the file to summaries :param name: the name of the file that will be written :return: a sumamry of the paper. """ paper = self.prepare_paper(filename) bag_of_words = self.paper_bags_of_words[filename] paper_keyphrases = self.keyphrases[filename] # We don't want to make any predictions for the Abstract or Highlights as these are already summaries. sections_to_predict_for = [] for section, text in paper.iteritems(): if section != "ABSTRACT" and section != "HIGHLIGHTS": sections_to_predict_for.append(text) # Sorts the sections according to the order in which they appear in the paper. sorted_sections_to_predict_for = sorted(sections_to_predict_for, key=itemgetter(1)) # Creates a list of the sentences in the paper in the correct order. Each item in the list is formed of # a list of words making up the sentence. sentence_list = [] for sentence_text, section_position_in_paper in sorted_sections_to_predict_for: section_sentences = sentence_text for sentence in section_sentences: sentence_list.append(sentence) # Use the model to predict if each sentence is a summary sentence or not. predictions = [] for sentence_text in sentence_list: tf_idf = useful_functions.calculate_tf_idf(sentence_text, self.global_paper_count, bag_of_words) predictions.append(tf_idf) # Produces a list of the form [sentence_text, sentence_index_in_paper, sentence tf_idf score] sentence_list_with_predictions = zip(sentence_list, range(len(sentence_list)), predictions) # Sort according to likelihood of being a summary sorted_predictions = reversed(sorted(sentence_list_with_predictions, key=itemgetter(-1))) sorted_predictions = [x for x in sorted_predictions] # Slice the top few sentences to form the summary sentences summary_sents = sorted_predictions[0:self.summary_length] # Order sumamry sentences according to the order they appear in the paper ordered_summary = sorted(summary_sents, key=itemgetter(-2)) # Print the summary summary = [] for item in ordered_summary: sentence_position = item[1] sentence = " ".join(item[0]) summary.append((sentence, sentence_position)) useful_functions.write_summary(SUMMARY_WRITE_LOC, summary, filename.strip(".txt")) for sentence in summary: print(sentence) print()