def run_reranking(reference_doc,
                  query,
                  labels_file,
                  add_remove_file,
                  beta="-"):
    features_dir = "Features"
    feature_file = "features_" + query
    create_features_file(features_dir, params.path_to_index,
                         params.queries_xml, feature_file, add_remove_file, "")
    index_doc_name = create_index_to_doc_name_dict(feature_file)
    scores_file = run_model(feature_file)
    results = retrieve_scores(index_doc_name, scores_file)
    lists = create_lists(results)
    addition = abs(lists[query].index(reference_doc))
    labels_file.write(query + " 1 " + beta + " " + str(addition) + "\n")
Пример #2
0
def run_chosen_model_for_stats(chosen_models, method, feature_file,
                               doc_name_index, base_features_file, beta):
    key = method
    if beta:
        key += "_" + beta

    chosen_model_parameter = chosen_models[key]
    svm = svm_handler()
    model_file = svm.learn_svm_rank_model(base_features_file, method,
                                          chosen_model_parameter)
    evaluator = eval(["map", "ndcg", "P.2", "P.5"])
    scores_file = svm.run_svm_rank_model(feature_file, model_file, method)

    results = retrieve_scores(scores_file)
    trec_file = create_trec_eval_file(doc_name_index, results, method)
    final_trec_file = evaluator.order_trec_file(trec_file)
    return final_trec_file
Пример #3
0

doc_texts = load_file(params.trec_text_file)
merged_index = ""
for index in range(1, 6):
    print("in epoch", index)
    doc_text_for_round = get_docs(doc_texts, round=index)
    trec_text_file = create_trectext_original(document_text=doc_text_for_round,
                                              summaries=[],
                                              run_name=str(index),
                                              avoid=[])
    new_index = create_index(trec_text_file, str(index))
    if merged_index:
        run_bash_command("rm -r " + merged_index)
    merged_index = merge_indices(new_index=new_index,
                                 run_name=str(index),
                                 new_index_name="merged_index")
    feature_file = "features" + "_" + str(index)
    features_dir = "Features"
    queries_file = "/home/greg/auto_seo/data/queries.xml"
    create_features_file_original(features_dir=features_dir,
                                  index_path=merged_index,
                                  new_features_file=feature_file,
                                  run_name=str(index),
                                  queries_file=queries_file)
    index_doc_name = create_index_to_doc_name_dict(feature_file)
    scores_file = run_model(feature_file, str(index))
    results = retrieve_scores(index_doc_name, scores_file)
    trec_file = create_trec_eval_file(results, str(index))
    order_trec_file(trec_file)