Exemplo n.º 1
0
        doc_texts[doc] = a_doc_texts[doc]

# summaries={}
# print("starting summarization")
# for query in reference_docs:
#     print("in",query )
#     sys.stdout.flush()
#     reference_doc=reference_docs[query]
#     summaries[query] = create_multi_document_summarization(ranked_lists,query,queries[query],reference_doc,params.number_of_documents_above,doc_texts,index,token2id,dic,id2df)
# print("finished summarization")
# summary_file = open("summaries","wb")
# pickle.dump(summaries,summary_file)
# summary_file.close()

# reference_docs_list = list(reference_docs.values())
# create_trectext(doc_texts,reference_docs_list,summaries)
# index_path = create_index()
# print("merging indices")
# sys.stdout.flush()
# merge_indices(index_path) features_dir,index_path,queries_file
create_features_file("Features",
                     "/lv_local/home/sgregory/auto_seo/new_merged_index",
                     "/lv_local/home/sgregory/auto_seo/data/queries.xml")
index_doc_name = create_index_to_doc_name_dict("features")
scores_file = run_model("features")

results = retrieve_scores(index_doc_name, scores_file)

results_file = open("scores_of_model", "wb")
pickle.dump(results, results_file)
results_file.close()
    for query in reference_docs:
        print("in", query)
        sys.stdout.flush()
        reference_doc = reference_docs[query]
        new_texts[reference_docs[query]] = create_new_document_by_weaving(
            doc_texts[reference_doc], queries[query], threshold)
    print("finished summarization")
    summary_file = open("new_texts" + run_name, "wb")
    pickle.dump(new_texts, summary_file)
    summary_file.close()

    reference_docs_list = list(reference_docs.values())
    create_trectext(doc_texts, reference_docs_list, new_texts, run_name)
    index_path = create_index(run_name)
    print("merging indices")
    sys.stdout.flush()
    new_index_name = merge_indices(index_path, run_name)
    features_dir = "Features" + run_name
    feature_file = "features"
    wait_for_feature_file_to_be_deleted(feature_file)
    create_features_file(features_dir, new_index_name, params.queries_xml,
                         run_name)
    move_feature_file(feature_file, run_name)
    index_doc_name = create_index_to_doc_name_dict(feature_file + run_name)
    scores_file = run_model(feature_file + run_name, run_name)
    results = retrieve_scores(index_doc_name, scores_file)
    results_file = open("scores_of_model" + run_name, "wb")
    pickle.dump(results, results_file)
    results_file.close()
    f = open("stop.stop" + run_name, 'w')
    f.close()
                add = open("/home/greg/auto_seo/scripts/add_remove_4_test",
                           'w',
                           encoding="utf8")
                add.write(reference_doc + "@@@" + new_sentence.rstrip() +
                          "@@@" + reference_sentence.rstrip() + "\n")
                sentence_data_file.write(run_name + "@@@" +
                                         new_sentence.rstrip() + "@@@" +
                                         reference_sentence.rstrip() + "\n")
                add.close()
                time.sleep(1)
                trec_text_file = create_trectext(doc_texts, summaries, "", [])
                features_dir = "Features_4"
                feature_file = "features_4_" + run_name
                create_features_file(
                    features_dir, params.path_to_index, params.queries_xml,
                    feature_file,
                    "/home/greg/auto_seo/scripts/add_remove_4_test", "")
                index_doc_name = create_index_to_doc_name_dict(feature_file)
                scores_file = run_model(feature_file)
                results = retrieve_scores(index_doc_name, scores_file)
                lists = create_lists(results)
                addition = max(3 - lists[query].index(reference_doc), 0)
                query = sentence.split("-")[2]
                labels_file.write(query + " 1 " + run_name + " " +
                                  str(addition) + " seo" + "\n")
                r_index += 1
                labels_file.close()
        index += 1

    sentence_data_file.close()