예제 #1
0
 def test_cluster_terms_bow(self):
     return
     mysearcher = searcher("../data/index/inv",
                           "../data/index/docvec",
                           in_memory=True, score="bow", filter_type="cluster")
     while True:
         query = input("query input: ")
         if query == "q":
             break
         print(mysearcher.search(query))
예제 #2
0
 def test_high_idf_tf_idf(self):
     return
     mysearcher = searcher("../data/index/inv",
                           "../data/index/docvec",
                           in_memory=True, score="tf-idf", filter_type="high-idf")
     while True:
         query = input("query input: ")
         if query == "q":
             break
         print(mysearcher.search(query))
         break
예제 #3
0
 def test_heap_bow(self):
     return
     mysearcher = searcher("../data/index/inv",
                           "../data/index/docvec",
                           in_memory=True, score="bow")
     while True:
         query = input("query input: ")
         if query == "q":
             break
         print(mysearcher.search(query))
         break
예제 #4
0
 def test_new_searcher(self):
     # return
     mysearcher = searcher("../data/index/inv",
                           "../data/index/docvec",
                           "../data/index/meta.json",
                           "../resources/stopwords/cn_stopwords.txt",
                           in_memory=True, cluster_load=1,tf_idf=1)
     while True:
         query = input("query input: ")
         if query == "q":
             break
         
         print(mysearcher.search(query, score="tf-idf", filter_type="cluster"))
            for s in score:
                begin = time.time()
                result, words = my_searcher.search(query,
                                                   score=s,
                                                   filter_type=t)
                end = time.time() - begin
                res[t][s] = {'result': result, 'time': end}
        res['time'] = time.time() - begin
        query_result_file.write(json.dumps(res, ensure_ascii=False) + "\n")
    query_result_file.close()


if __name__ == "__main__":
    title_query = read_file("./title_query.txt")
    content_query = read_file("./content_query.txt")
    print("load searcher")
    begin = time.time()
    my_searcher = searcher("../data/index/inv",
                           "../data/index/docvec",
                           "../data/index/meta.json",
                           "../resources/stopwords/cn_stopwords.txt",
                           in_memory=True,
                           proc_num=8,
                           cluster_load=1,
                           tf_idf=1)
    print("load done, use: %fs" % (time.time() - begin))
    print("run title query")
    run_and_write("./title_query_result.json", title_query)
    print("run content query")
    run_and_write("./content_query_result.json", content_query)