示例#1
0
def search_and_rank_query(query, inverted_index,num_docs_to_retrieve):
    p = Parse()
    dictFromQuery = {}
    p.tokenSplit(query, dictFromQuery)
    query_as_list = [*dictFromQuery]
    searcher = Searcher(inverted_index)
    #posting = utils.load_obj("posting")
    print('-------------------------------------')
    print('Start import mapReduce')
    map_reduce = MapReduce.import_map_reduce('MapReduceData/')
    print('Done importing mapReduce')
    posting = {}
    print('-------------------------------------')
    print('Start build posting file')
    for term in query_as_list:
        posting[term] = map_reduce.read_from(term)
    print('Done building posting file')
    print('-------------------------------------')
    print('Get relevant Doc')
    relevant_docs = searcher.relevant_docs_from_posting(query_as_list,posting)
    print('Done getting relevant Doc')
    print('-------------------------------------')
    print('Start ranking docs')
    ranked_docs = searcher.ranker.rank_relevant_doc(relevant_docs,dictFromQuery,posting,num_docs_to_retrieve)
    print('Done ranking docs')
    return searcher.ranker.retrieve_top_k(ranked_docs,num_docs_to_retrieve)
示例#2
0
def search_and_rank_query(query, inverted_index, num_docs_to_retrieve):
    p = Parse()
    dictFromQuery = {}
    map_reduce_ag = MapReduce.import_map_reduce('MapReduceData/AG/')
    map_reduce_hq = MapReduce.import_map_reduce('MapReduceData/HQ/')
    map_reduce_rz = MapReduce.import_map_reduce('MapReduceData/Rz/')
    map_reduce_other = MapReduce.import_map_reduce('MapReduceData/Others/')
    map_reduce_doc = MapReduce.import_map_reduce('MapReduceData/Document/')
    p.tokenSplit(query, dictFromQuery)
    query_as_list = [*dictFromQuery]
    searcher = Searcher(inverted_index)
    #posting = utils.load_obj("posting")
    print('-------------------------------------')
    print('Start import mapReduce')
    # map_reduce = MapReduce.import_map_reduce('MapReduceData/')

    print('Done importing mapReduce')
    posting = {}
    print('-------------------------------------')
    print('Start build posting file')
    query_as_list.sort(key=lambda x: x.lower())
    for term in query_as_list:
        lower_letter = term[0].lower()
        current_map = map_reduce_other
        if 'a' <= lower_letter <= 'g':
            current_map = map_reduce_ag
        elif 'h' <= lower_letter <= 'q':
            current_map = map_reduce_hq
        elif 'r' <= lower_letter <= 'z':
            current_map = map_reduce_rz
        posting[term] = current_map.read_from(term.lower())
    print('Done building posting file')
    print('-------------------------------------')
    print('Get relevant Doc')
    relevant_docs = searcher.relevant_docs_from_posting(query_as_list, posting)
    print('Done getting relevant Doc')
    print('-------------------------------------')
    print('Start ranking docs')
    ranked_docs = searcher.ranker.rank_relevant_doc(
        relevant_docs, dictFromQuery, posting, map_reduce_ag, map_reduce_hq,
        map_reduce_rz, map_reduce_other, num_docs_to_retrieve)
    print('Done ranking docs')
    return searcher.ranker.retrieve_top_k(ranked_docs, num_docs_to_retrieve)