def DemoConsole(graph, utility, sess, model_dir, dat):
  i = 0
  print("Listening to incoming questions...")

  while (True):
    question_id = 'iac-' + str(i)
    table_key = raw_input("> What table do you want? \n")
    table_key = "csv/custom-csv/" + table_key + ".csv"
    while (True):
      tokens = raw_input("> ")
      print("\n")
      if tokens == 'new':
        break
      print("Question:", tokens, "Table:", table_key)
      example = dat.load_example(question_id, tokens, table_key)
      data = [example]
      data_utils.construct_vocab(data, utility, True)
      final_data = data_utils.complete_wiki_processing(data, utility, 'demo')
      answer = get_prediction(sess, final_data, graph, utility)
      final_answer = ''

      certainty = answer[2]

      if answer[1] == 'scalar':
        final_answer = str(answer[0][0])
        debugging = str(answer[0][1])
      else:
        print(answer)
        a = answer[0][0][0]
        row = a[1][0]
        col = a[2]
        if col < 15:
          list_answer = dat.custom_tables[table_key].number_columns[col][row]
        else:
          list_answer = dat.custom_tables[table_key].word_columns[col-15][row]
        if type(list_answer) == float:
          final_answer = str(list_answer)
        else:
          for l in list_answer:
            final_answer += " " + str(l)

      print("\n")
      if (certainty < FLAGS.certainty_threshold):
        print("> I do not know the answer to your question, although I would say..." + "\n")
      print "> " + final_answer + "\n"
      i += 1
Пример #2
0
def main(args):
  utility = Utility()
  train_name = "random-split-1-train.examples"
  dev_name = "random-split-1-dev.examples"
  test_name = "pristine-unseen-tables.examples"
  #load data
  dat = wiki_data.WikiQuestionGenerator(train_name, dev_name, test_name, FLAGS.data_dir)
  train_data, dev_data, test_data = dat.load()
  utility.words = []
  utility.word_ids = {}
  utility.reverse_word_ids = {}
  #construct vocabulary
  data_utils.construct_vocab(train_data, utility)
  data_utils.construct_vocab(dev_data, utility, True)
  data_utils.construct_vocab(test_data, utility, True)
  data_utils.add_special_words(utility)
  data_utils.perform_word_cutoff(utility)
  #convert data to int format and pad the inputs
  train_data = data_utils.complete_wiki_processing(train_data, utility, True)
  dev_data = data_utils.complete_wiki_processing(dev_data, utility, False)
  test_data = data_utils.complete_wiki_processing(test_data, utility, False)
  print "# train examples ", len(train_data)
  print "# dev examples ", len(dev_data)
  print "# test examples ", len(test_data)
  print "running open source"
  #construct TF graph and train or evaluate
  master(train_data, dev_data, utility)
Пример #3
0
def main(args):
    utility = Utility()
    train_name = "random-split-1-train.examples"
    dev_name = "random-split-1-dev.examples"
    test_name = "pristine-unseen-tables.examples"
    #load data
    dat = wiki_data.WikiQuestionGenerator(train_name, dev_name, test_name,
                                          FLAGS.data_dir)
    train_data, dev_data, test_data = dat.load()
    utility.words = []
    utility.word_ids = {}
    utility.reverse_word_ids = {}
    #construct vocabulary
    data_utils.construct_vocab(train_data, utility)
    data_utils.construct_vocab(dev_data, utility, True)
    data_utils.construct_vocab(test_data, utility, True)
    data_utils.add_special_words(utility)
    data_utils.perform_word_cutoff(utility)
    #convert data to int format and pad the inputs
    train_data = data_utils.complete_wiki_processing(train_data, utility, True)
    dev_data = data_utils.complete_wiki_processing(dev_data, utility, False)
    test_data = data_utils.complete_wiki_processing(test_data, utility, False)
    print "# train examples ", len(train_data)
    print "# dev examples ", len(dev_data)
    print "# test examples ", len(test_data)
    print "running open source"
    #construct TF graph and train or evaluate
    master(train_data, dev_data, utility)
def Test(graph, utility, batch_size, sess, model_dir, dat, file_name):

    ids, questions, table_keys, answers = wiki_data.load_custom_questions(file_name)
    data = []
    for i in range(len(questions)):
      example = dat.load_example(ids[i], questions[i], table_keys[i])
      data.append(example) 
    
    data_utils.construct_vocab(data, utility, True)
    final_data = data_utils.complete_wiki_processing(data, utility, 'demo')
    predictions = evaluate_custom(sess, final_data, answers, batch_size, graph, table_keys[0], dat)
    total = len(predictions)
    correct = 0.0
    for i in range(total):
      if predictions[i] == answers[i]:
        correct += 1
      else:
        print(questions[i], predictions[i], answers[i])
    accuracy = (correct / total) * 100
    print("Total test cases:", total)
    print("Correct answers:", correct)
    print("Accuracy:", accuracy)
Пример #5
0
def init_data(
    data_dir,
    preserve_vocab=False,
    split_filenames={
        'train': 'random-split-1-train.examples',
        'dev': 'random-split-1-dev.examples',
        'test': 'pristine-unseen-tables.examples'
    },
    annotated_filenames={
        'train': 'training.annotated',
        'test': 'pristine-unseen-tables.annotated'
    }):
    """ Load WikiTableQuestions data. 
    preserve_vocab is used when perturbed data is loaded, 
    in which case special words are given hard-coded ids
    to match that of the unperturbed data case
    """
    utility = Utility()
    train_name = split_filenames['train']
    dev_name = split_filenames['dev']
    test_name = split_filenames['test']
    # load data
    dat = wiki_data.WikiQuestionGenerator(train_name, dev_name, test_name,
                                          data_dir)
    train_data, dev_data, test_data = dat.load(annotated_filenames)
    utility.words = []
    utility.word_ids = {}
    utility.reverse_word_ids = {}
    # construct vocabulary
    data_utils.construct_vocab(train_data, utility)
    data_utils.construct_vocab(dev_data, utility, True)
    data_utils.construct_vocab(test_data, utility, True)
    data_utils.add_special_words(utility)
    # set absolute word_ids for special words
    if preserve_vocab:
        print("hardcoded ids for special words")
        word_to_swap = utility.reverse_word_ids[9133]
        word_id_to_swap = utility.word_ids[utility.entry_match_token]
        utility.word_ids[word_to_swap] = utility.word_ids[
            utility.entry_match_token]
        utility.word_ids[utility.entry_match_token] = 9133
        utility.entry_match_token_id = utility.word_ids[
            utility.entry_match_token]
        utility.reverse_word_ids[word_id_to_swap] = word_to_swap
        utility.reverse_word_ids[9133] = utility.entry_match_token

        word_to_swap = utility.reverse_word_ids[9134]
        word_id_to_swap = utility.word_ids[utility.column_match_token]
        utility.word_ids[word_to_swap] = utility.word_ids[
            utility.column_match_token]
        utility.word_ids[utility.column_match_token] = 9134
        utility.column_match_token_id = utility.word_ids[
            utility.column_match_token]
        utility.reverse_word_ids[word_id_to_swap] = word_to_swap
        utility.reverse_word_ids[9134] = utility.column_match_token

        word_to_swap = utility.reverse_word_ids[9135]
        word_id_to_swap = utility.word_ids[utility.dummy_token]
        utility.word_ids[word_to_swap] = utility.word_ids[utility.dummy_token]
        utility.word_ids[utility.dummy_token] = 9135
        utility.dummy_token_id = utility.word_ids[utility.dummy_token]
        utility.reverse_word_ids[word_id_to_swap] = word_to_swap
        utility.reverse_word_ids[9135] = utility.dummy_token

        word_to_swap = utility.reverse_word_ids[9136]
        word_id_to_swap = utility.word_ids[utility.unk_token]
        utility.word_ids[word_to_swap] = utility.word_ids[utility.unk_token]
        utility.word_ids[utility.unk_token] = 9136
        utility.unk_token_id = utility.word_ids[utility.unk_token]
        utility.reverse_word_ids[word_id_to_swap] = word_to_swap
        utility.reverse_word_ids[9136] = utility.unk_token

        print(utility.entry_match_token_id, utility.column_match_token_id,
              utility.dummy_token_id, utility.unk_token_id)

    data_utils.perform_word_cutoff(utility)
    unprocessed_dev_data = copy.deepcopy(dev_data)
    # convert data to int format and pad the inputs
    train_data = data_utils.complete_wiki_processing(train_data, utility, True)
    dev_data = data_utils.complete_wiki_processing(dev_data, utility, False)
    test_data = data_utils.complete_wiki_processing(test_data, utility, False)
    print(("# train examples ", len(train_data)))
    print(("# dev examples ", len(dev_data)))
    print(("# test examples ", len(test_data)))
    return train_data, dev_data, test_data, utility, unprocessed_dev_data
def Demo(graph, utility, sess, model_dir, dat):
  i = 0
  s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  s.bind((config.socket_address, config.socket_port))
  s.listen(1)
  print("Listening to incoming questions...")
  while (True):
    conn, addr = s.accept()
    data = conn.recv(1024).decode("utf-8").split("****----****")
    table_key = data[0]
    tokens = data[1]
    question_id = 'iac-' + str(i)
    print("Question:", tokens, "Table:", table_key)
    example = dat.load_example(question_id, tokens, table_key)
    data = [example] 
    data_utils.construct_vocab(data, utility, True)
    final_data = data_utils.complete_wiki_processing(data, utility, 'demo')
    answer = get_prediction(sess, final_data, graph, utility)
    final_answer = ''

    certainty = answer[2]

    if answer[1] == 'scalar':
      final_answer = str(answer[0][0])
      debugging = answer[0][1]
      debugging['answer_neural'].append(int(answer[0][0]))
    else:
      print("Debugging in MODEL:")
      a = answer[0][0][0]
      debugging = answer[0][1]
      print(debugging)
      rows = a[1]
      col = a[2]
      rows_answer = []
      for row in rows:
        row_answer = ''
        if col < 15:
          list_answer = dat.custom_tables[table_key].number_columns[col][row]
        else:
          list_answer = dat.custom_tables[table_key].word_columns[col-15][row]
        if type(list_answer) == float:
          debugging['answer_neural'].append(list_answer)
          row_answer = str(list_answer)
        else:
          for l in list_answer:
            row_answer += " " + str(l)
          debugging['answer_neural'].append(row_answer[1:])
        rows_answer.append(row_answer)
       
      final_answer = ','.join(rows_answer)

    print("Answer:", final_answer + "\n")

    if (certainty < FLAGS.certainty_threshold):
      print("I do not know the answer to your question, although that would be my guess.")
      debugging['below_threshold'] = True
      final_answer = "I cannot answer that question with the information in the table."

    result = {"answer": final_answer, "debugging": debugging}
    result = str(result)
    i += 1
    conn.send(result.encode())
    conn.close()