Exemplo n.º 1
0
def search_and_rank_query(query, docs, k, stemming, output_path):
    p = Parse(stemming)
    wordnet = WordNet()
    query = wordnet.expand_query(p.remove_stopwords(query))
    parsed_query, parsed_entities = p.parse_query(query)
    searcher = Searcher(docs, output_path)
    relevant_docs = searcher.relevant_docs_from_posting(
        parsed_query, parsed_entities)
    ranked_docs = searcher.ranker.rank_relevant_docs(relevant_docs)
    return searcher.ranker.retrieve_top_k(ranked_docs, k)
Exemplo n.º 2
0
def search_and_rank_query(query, inverted_index, k, number_of_documents,
                          inverted_documents_dict, load_path):
    p = Parse()
    query_object = p.parse_query(query)
    searcher = Searcher(inverted_index, number_of_documents, load_path)
    relevant_docs = searcher.relevant_docs_from_posting(query_object)
    normalized_query = searcher.normalized_query(query_object)
    ranked_docs = searcher.ranker.rank_relevant_doc(relevant_docs,
                                                    normalized_query,
                                                    inverted_documents_dict)
    return searcher.ranker.retrieve_top_k(ranked_docs, k)
Exemplo n.º 3
0
def search_and_rank_query(corpus_path, queries_list, inverted_index,
                          num_docs_to_retrieve, stemming, word2vec,
                          output_path):
    config = ConfigClass(corpus_path)
    p = Parse(stemming)
    answers = defaultdict(list)
    for i, q in enumerate(queries_list):
        # print("start query number: ", i + 1)
        query = p.parse_query(q)
        searcher = Searcher(inverted_index, stemming, word2vec)
        relevant_docs = searcher.relevant_docs_from_posting(
            query, stemming, config, output_path)
        ranked_docs = searcher.ranker.rank_relevant_doc(
            relevant_docs, query, word2vec, stemming, output_path)
        answers[i] = searcher.ranker.retrieve_top_k(ranked_docs,
                                                    num_docs_to_retrieve)
        # print("finish query number: ", i + 1)
    return answers
Exemplo n.º 4
0
def search_and_rank_query(query, inverted_index, k, config):
    """
    Parse a query to tokens, search for relevant documents and rank them using tf-idf cos similiarity
    :param query: string that contains a query
    :param inverted_index: The inverted index for the corpus
    :param k: Number of queries to retrive
    :param config: configuration class, holds info about stemming and where files are saved
    :return: k most relevant tweets for query
    """
    start = dt.datetime.now()
    p = Parse(config.toStem)
    query_as_list = p.parse_query(query)
    searcher = Searcher(inverted_index)
    relevant_docs = searcher.relevant_docs_from_posting(query_as_list, config)
    ranked_docs = searcher.ranker.rank_relevant_doc(relevant_docs, config,
                                                    query_as_list)
    end = dt.datetime.now()
    tot_time = (end - start).total_seconds() / 60.0
    #print("Query \"{}\" took {} minutes to analayze".format(query, tot_time))
    return searcher.ranker.retrieve_top_k(ranked_docs, k)