def _main(conf): conf.experiment_name = "{}_{}_{}".format(conf.task_name, conf.dataset, get_timestamp()) conf.model_dirpath = os.path.join("../results", conf.experiment_name) try: print("Create model directory:", conf.model_dirpath) os.makedirs(conf.model_dirpath) except OSError as e: print(e) # load file paths for training and validation from config file train_questions = load_questions(conf.training_file_path) valid_questions = load_questions(conf.validation_file_path) if conf.max_questions < len(train_questions): train_questions = random.sample(train_questions, conf.max_questions) if conf.max_questions < len(valid_questions): valid_questions = random.sample(valid_questions, conf.max_questions) print("#Train file instances: " + str(len(train_questions))) print("#Valid file instances: " + str(len(valid_questions))) res = load_resources(conf) print("Word Vocabulary", res.word_embeddings.vocabulary) print("Char Vocabulary", res.char_vocabulary) if conf.use_graph_embeddings == 1: print("Graph Vocabulary", res.graph_embeddings.vocabulary) print("Entity Vocabulary", res.entity_vocabulary) print("Relation Vocabulary", res.relation_vocabulary) print("Configuration:") print(conf.description()) print("build model ...") ranking_model = models.simple_joint_qa_predicate_model(conf, res) ranking_model.summary() print("Save configuration ...") conf.save(os.path.join(conf.model_dirpath, "configuration.json")) train(train_questions, valid_questions, ranking_model, conf, res) print("Save weights ...") ranking_model.save_weights(os.path.join(conf.model_dirpath, "answer_ranking_model.hdf5"), overwrite=True) print("Done!")
def stage2(players): print("Lets begin the second stage!") first_question = True current_player = players[0] questions2 = load_questions(25, stage=2) question_counter = 0 while len(players) > 3: if not first_question: current_player = select_player(players, select=True) else: first_question = False try: question = questions2[question_counter] correct = ask_question(current_player, question) except IndexError: print("The pool of questions has ended") correct = ask_question(current_player) check_current_player_chances(players, current_player, correct, stage="stage2") question_counter += 1 print("The players going to the FINAL STAGE are:") for player in players: print(player) return players
def main(questions_filepath, output_filepath): questions = load_questions(questions_filepath) with open(output_filepath, "w") as fout: for q in questions: text = q["text"] # subject = q["subject"] predicate = q["predicate"] # for c in q["candidates"]: # if c["uri"] == subject: # ngram = q["ngram"] fout.write(LABEL_PREFIX + predicate + " " + text + "\n")
def print_questions(docs_json): qs = load_questions('test.tsv') for i in range(200): print(" ") print(i) print(qs.questions[i].question) print(docs_json[str(qs.questions[i].document_id)][str( qs.questions[i].passages[0])]) print(process_and_tokenize_string(qs.questions[i].question)) print( process_and_tokenize_string(docs_json[str( qs.questions[i].document_id)][str( qs.questions[i].passages[0])]))
def main(questions_filepath, experiment_dir): conf_filepath = experiment_dir + "/configuration.json" conf = Configuration.load(conf_filepath) if conf.use_ner: print("Using NER questions!") questions_filepath = "../res/test_after_ner.txt" # take top 7 candidate entities conf.test_top_candidates = 7 print("Configuration:") print(conf.description()) questions = load_questions(questions_filepath) if conf.max_questions < len(questions): questions = random.sample(questions, conf.max_questions) # if conf.max_questions: # questions = random.sample(questions, conf.max_questions) print("#Questions:", len(questions)) res = load_resources(conf) print("Word Vocabulary", res.word_embeddings.vocabulary) print("Char Vocabulary", res.char_vocabulary) print("Entity Vocabulary", res.entity_vocabulary) print("Relation Vocabulary", res.relation_vocabulary) print("build model ...") ranking_model = None ranking_model = models.simple_joint_qa(conf, res) ranking_model.load_weights( os.path.join(conf.model_dirpath, "best_answer_ranking_model.hdf5")) ranking_model.summary() # ranking_model = None print('Starting predicting ...\n') output_filepath = os.path.join(conf.model_dirpath, "predicted_answers.txt") predicted_data_batches = predict(questions, ranking_model, conf, res) print('Writing predictions to the model path') write_prediction(predicted_data_batches, output_filepath, conf) print('Writing prediction summary to the model path') recall_output_filepath = os.path.join(conf.model_dirpath, "prediction_summary.txt") recall_values = recall_at_n(predicted_data_batches, [1, 3, 5, 10, 20, 50], conf) write_prediction_summary(recall_values, recall_output_filepath, conf)
def final_stage(players): print("Lets begin the final stage!") prepare_players_for_final(players) own_question = False thirty_points_reached = False max_num_of_questions = 40 questions3 = load_questions(max_num_of_questions, stage=2) for n in range(max_num_of_questions): question = questions3[n] # the stage when 'first come first served' if not thirty_points_reached: correct = ask_question(question=question) current_player = select_player(players, select=True) check_current_player_chances(players, current_player, correct, stage="final_stage") if correct: add_points(current_player, own_question) thirty_points_reached = check_30_points(players) else: # getting previously answering player for the sake of own question previous_player = current_player current_player = select_player(players, select=True) # own question if current_player.post == previous_player.post and thirty_points_reached: own_question = True correct = ask_question(current_player, question=question) check_current_player_chances(players, current_player, correct, stage="final_stage") if correct: add_points(current_player, own_question) own_question = False # after the wrong the decision we come back to the stage when 'first come first served' if current_player == previous_player and not correct: thirty_points_reached = False present_scores(players) if check_winner(players): exit() # checking winner after the end of questions check_winner(players, end=True)
def export_to_file(docs_json, document_indexer, passage_indexer): if reindex_documents: print('reindexing_documents is on.. refusing to export to file') return data = [] # load_json_from_file('answers') start = 0 qs = load_questions('test.tsv') document_indexer.index(docs_json, reindex_documents) question_count = len(qs.questions) for i in range(start, start + question_count): query = qs.questions[i] answers = [] response = dict() response['id'] = query.qid response['answers'] = answers try: print('Processing question: ' + str(query.qid) + " (" + str(i) + ")") top_docs = document_indexer.execute_query(query.question) sliced_docs = { top_doc.doc.get_id(): docs_json[str(top_doc.doc.get_id())] for top_doc in top_docs[0:2] } passage_indexer.index(sliced_docs, False) top_passages = passage_indexer.execute_query(query.question) for j in range(5): answer = { 'answer': str(top_passages[j].passage.get_doc_id()) + ':' + str(top_passages[j].passage.get_id()), 'score': str(top_passages[j].get_score()) } answers.append(answer) data.append(response) except Exception as e: print("error") with open('data\\answers.json', 'w', encoding='utf-8') as outfile: json.dump(data, outfile, ensure_ascii=False, indent=2)
def main(questions_filepath, conf): # conf = Configuration.load(config_filepath) # conf.padding_position = "pre" print("Configuration:") print(conf.description()) questions = load_questions(questions_filepath) if conf.max_questions < len(questions): questions = random.sample(questions, conf.max_questions) # if conf.max_questions: # questions = random.sample(questions, conf.max_questions) print("#Questions:", len(questions)) res = load_resources(conf) print("Word Vocabulary", res.word_embeddings.vocabulary) print("Char Vocabulary", res.char_vocabulary) print("Entity Vocabulary", res.entity_vocabulary) print("Relation Vocabulary", res.relation_vocabulary) print("build model ...") ranking_model = None ranking_model = models.simple_joint_qa_predicate_model(conf, res) ranking_model.load_weights(os.path.join(conf.model_dirpath, "best_answer_ranking_model.hdf5")) ranking_model.summary() # ranking_model = None print('Starting predicting ...\n') output_filepath = os.path.join(conf.model_dirpath, "predicate_predicted_answers.txt") recall_output_filepath = os.path.join(conf.model_dirpath, "predicate_prediction_summary.txt") predicted_data = predict(questions, ranking_model, conf, res) recall_ranges = [1, 3, 5, 10, 20, 50] print('Computing recall values and writing predictions to the model path') compute_recall_and_write_predictions(predicted_data, output_filepath, recall_output_filepath, recall_ranges, conf, res)
def score_documents_retrieval(docs_json, document_indexer, passage_indexer): count = 0 start = 70 question_count = 20 qs = load_questions('test.tsv') document_indexer.index(None, False) for i in range(start, start + question_count): query = qs.questions[i] top_docs = document_indexer.execute_query(query.question) sliced_docs = { top_doc.doc.get_id(): docs_json[str(top_doc.doc.get_id())] for top_doc in top_docs[0:3] } passage_indexer.index(sliced_docs, True) top_passages = passage_indexer.execute_query(query.question) print('\nExpected: document: ' + str(qs.questions[i].document_id) + " Passages: " + str(qs.questions[i].passages)) print('Results:\n' + str(top_passages[0:10]))
def stage1(players): print("Let's begin the first stage") questions1 = load_questions(len(players), stage=1) num_of_players = len(players) for n in range((num_of_players + 1) * 2): current_player = select_player(players, num_of_players, n) if not current_player: continue try: question = questions1[n] correct = ask_question(current_player, question) except IndexError: print("The pool of questions has ended") correct = ask_question(current_player) check_current_player_chances(players, current_player, correct, stage="stage1") check_winner(players) print("The players going to the SECOND STAGE are:") for player in players: print(player) return players