def DemoConsole(graph, utility, sess, model_dir, dat): i = 0 print("Listening to incoming questions...") while (True): question_id = 'iac-' + str(i) table_key = raw_input("> What table do you want? \n") table_key = "csv/custom-csv/" + table_key + ".csv" while (True): tokens = raw_input("> ") print("\n") if tokens == 'new': break print("Question:", tokens, "Table:", table_key) example = dat.load_example(question_id, tokens, table_key) data = [example] data_utils.construct_vocab(data, utility, True) final_data = data_utils.complete_wiki_processing(data, utility, 'demo') answer = get_prediction(sess, final_data, graph, utility) final_answer = '' certainty = answer[2] if answer[1] == 'scalar': final_answer = str(answer[0][0]) debugging = str(answer[0][1]) else: print(answer) a = answer[0][0][0] row = a[1][0] col = a[2] if col < 15: list_answer = dat.custom_tables[table_key].number_columns[col][row] else: list_answer = dat.custom_tables[table_key].word_columns[col-15][row] if type(list_answer) == float: final_answer = str(list_answer) else: for l in list_answer: final_answer += " " + str(l) print("\n") if (certainty < FLAGS.certainty_threshold): print("> I do not know the answer to your question, although I would say..." + "\n") print "> " + final_answer + "\n" i += 1
def main(args): utility = Utility() train_name = "random-split-1-train.examples" dev_name = "random-split-1-dev.examples" test_name = "pristine-unseen-tables.examples" #load data dat = wiki_data.WikiQuestionGenerator(train_name, dev_name, test_name, FLAGS.data_dir) train_data, dev_data, test_data = dat.load() utility.words = [] utility.word_ids = {} utility.reverse_word_ids = {} #construct vocabulary data_utils.construct_vocab(train_data, utility) data_utils.construct_vocab(dev_data, utility, True) data_utils.construct_vocab(test_data, utility, True) data_utils.add_special_words(utility) data_utils.perform_word_cutoff(utility) #convert data to int format and pad the inputs train_data = data_utils.complete_wiki_processing(train_data, utility, True) dev_data = data_utils.complete_wiki_processing(dev_data, utility, False) test_data = data_utils.complete_wiki_processing(test_data, utility, False) print "# train examples ", len(train_data) print "# dev examples ", len(dev_data) print "# test examples ", len(test_data) print "running open source" #construct TF graph and train or evaluate master(train_data, dev_data, utility)
def Test(graph, utility, batch_size, sess, model_dir, dat, file_name): ids, questions, table_keys, answers = wiki_data.load_custom_questions(file_name) data = [] for i in range(len(questions)): example = dat.load_example(ids[i], questions[i], table_keys[i]) data.append(example) data_utils.construct_vocab(data, utility, True) final_data = data_utils.complete_wiki_processing(data, utility, 'demo') predictions = evaluate_custom(sess, final_data, answers, batch_size, graph, table_keys[0], dat) total = len(predictions) correct = 0.0 for i in range(total): if predictions[i] == answers[i]: correct += 1 else: print(questions[i], predictions[i], answers[i]) accuracy = (correct / total) * 100 print("Total test cases:", total) print("Correct answers:", correct) print("Accuracy:", accuracy)
def init_data( data_dir, preserve_vocab=False, split_filenames={ 'train': 'random-split-1-train.examples', 'dev': 'random-split-1-dev.examples', 'test': 'pristine-unseen-tables.examples' }, annotated_filenames={ 'train': 'training.annotated', 'test': 'pristine-unseen-tables.annotated' }): """ Load WikiTableQuestions data. preserve_vocab is used when perturbed data is loaded, in which case special words are given hard-coded ids to match that of the unperturbed data case """ utility = Utility() train_name = split_filenames['train'] dev_name = split_filenames['dev'] test_name = split_filenames['test'] # load data dat = wiki_data.WikiQuestionGenerator(train_name, dev_name, test_name, data_dir) train_data, dev_data, test_data = dat.load(annotated_filenames) utility.words = [] utility.word_ids = {} utility.reverse_word_ids = {} # construct vocabulary data_utils.construct_vocab(train_data, utility) data_utils.construct_vocab(dev_data, utility, True) data_utils.construct_vocab(test_data, utility, True) data_utils.add_special_words(utility) # set absolute word_ids for special words if preserve_vocab: print("hardcoded ids for special words") word_to_swap = utility.reverse_word_ids[9133] word_id_to_swap = utility.word_ids[utility.entry_match_token] utility.word_ids[word_to_swap] = utility.word_ids[ utility.entry_match_token] utility.word_ids[utility.entry_match_token] = 9133 utility.entry_match_token_id = utility.word_ids[ utility.entry_match_token] utility.reverse_word_ids[word_id_to_swap] = word_to_swap utility.reverse_word_ids[9133] = utility.entry_match_token word_to_swap = utility.reverse_word_ids[9134] word_id_to_swap = utility.word_ids[utility.column_match_token] utility.word_ids[word_to_swap] = utility.word_ids[ utility.column_match_token] utility.word_ids[utility.column_match_token] = 9134 utility.column_match_token_id = utility.word_ids[ utility.column_match_token] utility.reverse_word_ids[word_id_to_swap] = word_to_swap utility.reverse_word_ids[9134] = utility.column_match_token word_to_swap = utility.reverse_word_ids[9135] word_id_to_swap = utility.word_ids[utility.dummy_token] utility.word_ids[word_to_swap] = utility.word_ids[utility.dummy_token] utility.word_ids[utility.dummy_token] = 9135 utility.dummy_token_id = utility.word_ids[utility.dummy_token] utility.reverse_word_ids[word_id_to_swap] = word_to_swap utility.reverse_word_ids[9135] = utility.dummy_token word_to_swap = utility.reverse_word_ids[9136] word_id_to_swap = utility.word_ids[utility.unk_token] utility.word_ids[word_to_swap] = utility.word_ids[utility.unk_token] utility.word_ids[utility.unk_token] = 9136 utility.unk_token_id = utility.word_ids[utility.unk_token] utility.reverse_word_ids[word_id_to_swap] = word_to_swap utility.reverse_word_ids[9136] = utility.unk_token print(utility.entry_match_token_id, utility.column_match_token_id, utility.dummy_token_id, utility.unk_token_id) data_utils.perform_word_cutoff(utility) unprocessed_dev_data = copy.deepcopy(dev_data) # convert data to int format and pad the inputs train_data = data_utils.complete_wiki_processing(train_data, utility, True) dev_data = data_utils.complete_wiki_processing(dev_data, utility, False) test_data = data_utils.complete_wiki_processing(test_data, utility, False) print(("# train examples ", len(train_data))) print(("# dev examples ", len(dev_data))) print(("# test examples ", len(test_data))) return train_data, dev_data, test_data, utility, unprocessed_dev_data
def Demo(graph, utility, sess, model_dir, dat): i = 0 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind((config.socket_address, config.socket_port)) s.listen(1) print("Listening to incoming questions...") while (True): conn, addr = s.accept() data = conn.recv(1024).decode("utf-8").split("****----****") table_key = data[0] tokens = data[1] question_id = 'iac-' + str(i) print("Question:", tokens, "Table:", table_key) example = dat.load_example(question_id, tokens, table_key) data = [example] data_utils.construct_vocab(data, utility, True) final_data = data_utils.complete_wiki_processing(data, utility, 'demo') answer = get_prediction(sess, final_data, graph, utility) final_answer = '' certainty = answer[2] if answer[1] == 'scalar': final_answer = str(answer[0][0]) debugging = answer[0][1] debugging['answer_neural'].append(int(answer[0][0])) else: print("Debugging in MODEL:") a = answer[0][0][0] debugging = answer[0][1] print(debugging) rows = a[1] col = a[2] rows_answer = [] for row in rows: row_answer = '' if col < 15: list_answer = dat.custom_tables[table_key].number_columns[col][row] else: list_answer = dat.custom_tables[table_key].word_columns[col-15][row] if type(list_answer) == float: debugging['answer_neural'].append(list_answer) row_answer = str(list_answer) else: for l in list_answer: row_answer += " " + str(l) debugging['answer_neural'].append(row_answer[1:]) rows_answer.append(row_answer) final_answer = ','.join(rows_answer) print("Answer:", final_answer + "\n") if (certainty < FLAGS.certainty_threshold): print("I do not know the answer to your question, although that would be my guess.") debugging['below_threshold'] = True final_answer = "I cannot answer that question with the information in the table." result = {"answer": final_answer, "debugging": debugging} result = str(result) i += 1 conn.send(result.encode()) conn.close()