示例#1
0
    def results_relevant_documents_to_one_query(self, query):
        print("results_relevant_documents_to_one_query")
        if (self.final_dic):
            dictionary_parser = Parse.parse_text(query)
            dictionary_stemm = Stemmer.stemming(dictionary_parser,
                                                self.stemm_mode)

            final_dictionary_query = {}
            for term, details in dictionary_stemm.items():
                freq = details[0]
                final_dictionary_query[term] = freq

            result_rank = self.ranker.rank(self.stemm_mode, self.all_document,
                                           self.final_dic, self.path_folder,
                                           final_dictionary_query,
                                           self.semantic_mode)

        else:
            return []

        print("finish")
        self.ranker.reset_rank()
        return list(result_rank.keys())
示例#2
0
    def results_relevant_documents(self, queries_dictionary):
        print("results_relevant_documents")
        results = []
        if (self.final_dic):
            for number, value in queries_dictionary.items():
                # this is the dictionary of all terms in this query
                query_dictionary_description = {}
                query_title = value[0]
                query_description = value[1]
                query_narrative = value[2]

                # title
                parse_title = Parse.parse_text(query_title)
                stemm_title = Stemmer.stemming(parse_title, self.stemm_mode)
                # narrative - break down sentences and remove not relevant
                query_narrative = Parse.parse_query_narrative(query_narrative)
                # description + narrative
                query_description_narrative = query_narrative + ' ' + query_description
                parse_description_narrative = Parse.parse_text(
                    query_description_narrative)
                stemm_description_narrative = Stemmer.stemming(
                    parse_description_narrative, self.stemm_mode)

                # Normalize the number of occurrences of the term
                for term, details in stemm_description_narrative.items():
                    freq = details[0]
                    if term in self.final_dic:
                        idf = self.final_dic[term][1]
                        # new freq with idf
                        query_dictionary_description[term] = (
                            self.weight_idf * idf) + (self.weight_df * freq)

                number_of_term_in_query = len(query_dictionary_description)
                normalized_number_of_results = number_of_term_in_query / self.denominator
                normalized_number_of_results = int(
                    normalized_number_of_results)
                # Dictionary with the normalized_number_of_results most common terms
                query_dictionary_description_most_common = dict(
                    Counter(query_dictionary_description).most_common(
                        normalized_number_of_results))
                query_dictionary_description.clear()

                # chang dictionary
                final_dictionary_query = {}
                for term, details in stemm_title.items():
                    freq = details[0]
                    final_dictionary_query[term] = freq

                # merge dictionary_rank_title and query_dictionary_description_most_common to final_dictionary_query
                for term, val in query_dictionary_description_most_common.items(
                ):
                    if term in final_dictionary_query:
                        final_dictionary_query[
                            term] = final_dictionary_query[term] + val
                    else:
                        final_dictionary_query[term] = val

                # send the number of occurrences of a word in a document to the ranker
                # The ranker return dictionary with [term] = [(d1,tf1),(d2,tf2)...]
                result_rank = self.ranker.rank(self.stemm_mode,
                                               self.all_document,
                                               self.final_dic,
                                               self.path_folder,
                                               final_dictionary_query,
                                               self.semantic_mode)
                # [ query1 , {term1: [(d1,tf1),(d2,tf2)...] , term2: [(d1,tf1),(d2,tf2)...]} ]
                results.append((number, list(result_rank.keys())))

        print("finish")
        self.ranker.reset_rank()
        return results