Exemplo n.º 1
0
def evaluate(sess, data, batch_size, graph, model_step):
    #computes accuracy
    correct_dict = defaultdict(lambda: True)
    for j in range(0, len(data) - batch_size + 1, batch_size):
        [correct_list] = sess.run([graph.final_correct_list],
                                  feed_dict=data_utils.generate_feed_dict(
                                      data, j, batch_size, graph))
        for i, cl in enumerate(correct_list):
            correct_dict[data[j + i].question_id.split('_')[0]] = correct_dict[
                data[j + i].question_id.split('_')[0]] and (cl > 0)

    # cover the last few examples not in the last batch of the above loop
    j = len(data) - batch_size
    [correct_list] = sess.run([graph.final_correct_list],
                              feed_dict=data_utils.generate_feed_dict(
                                  data, j, batch_size, graph))
    for i, cl in enumerate(correct_list):
        correct_dict[data[j + i].question_id.split('_')[0]] = correct_dict[
            data[j + i].question_id.split('_')[0]] and (cl > 0)

    gc = sum(correct_dict.values())
    num_examples = len(list(correct_dict.keys()))
    print(("dev set accuracy   after ", model_step, " : ", gc / num_examples))
    print((num_examples, len(data)))
    print("--------")
    return gc, num_examples, correct_dict
Exemplo n.º 2
0
def Train(graph, utility, batch_size, train_data, sess, model_dir,
          saver):
  #performs training
  curr = 0
  train_set_loss = 0.0
  utility.random.shuffle(train_data)
  start = time.time()
  for i in range(utility.FLAGS.train_steps):
    curr_step = i
    if (i > 0 and i % FLAGS.write_every == 0):
      model_file = model_dir + "/model_" + str(i)
      saver.save(sess, model_file)
    if curr + batch_size >= len(train_data):
      curr = 0
      utility.random.shuffle(train_data)
    step, cost_value = sess.run(
        [graph.step, graph.total_cost],
        feed_dict=data_utils.generate_feed_dict(
            train_data, curr, batch_size, graph, train=True, utility=utility))
    curr = curr + batch_size
    train_set_loss += cost_value
    if (i > 0 and i % FLAGS.eval_cycle == 0):
      end = time.time()
      time_taken = end - start
      print "step ", i, " ", time_taken, " seconds "
      start = end
      print " printing train set loss: ", train_set_loss / utility.FLAGS.eval_cycle
      train_set_loss = 0.0
Exemplo n.º 3
0
def Train(graph, utility, batch_size, train_data, sess, model_dir, saver):
    #performs training
    curr = 0
    train_set_loss = 0.0
    utility.random.shuffle(train_data)
    start = time.time()
    for i in range(utility.FLAGS.train_steps):
        curr_step = i
        if (i > 0 and i % FLAGS.write_every == 0):
            model_file = model_dir + "/model_" + str(i)
            saver.save(sess, model_file)
        if curr + batch_size >= len(train_data):
            curr = 0
            utility.random.shuffle(train_data)
        step, cost_value = sess.run([graph.step, graph.total_cost],
                                    feed_dict=data_utils.generate_feed_dict(
                                        train_data,
                                        curr,
                                        batch_size,
                                        graph,
                                        train=True,
                                        utility=utility))
        curr = curr + batch_size
        train_set_loss += cost_value
        if (i > 0 and i % FLAGS.eval_cycle == 0):
            end = time.time()
            time_taken = end - start
            print "step ", i, " ", time_taken, " seconds "
            start = end
            print " printing train set loss: ", train_set_loss / utility.FLAGS.eval_cycle
            train_set_loss = 0.0
def evaluate_custom(sess, data, answers, batch_size, graph, table_key, dat):
  #computes accuracy
  num_examples = 0.0
  gc = 0.0
  final_predictions = []
  for curr in range(0, len(data) - batch_size + 1, batch_size):
    [predictions] = sess.run([graph.answers], feed_dict=data_utils.generate_feed_dict(data, curr, batch_size, graph))

    for i in range(batch_size):
      scalar_answer = predictions[0][i]
      lookup_answer = predictions[1][i]

      return_scalar = True
      lookup_answers = []
      j = 0
      for col in range(len(lookup_answer)):
        if not all(p == 0 for p in lookup_answer[col]):
          return_scalar = False
          if col < 15:
            col_name = data[j].number_column_names[col]
          else:
            col_name = data[j].word_column_names[col-15]
          lookup_answers.append([[i for i, e in enumerate(lookup_answer[col]) if e != 0], col])

      if return_scalar:
        final_predictions.append(str(int(scalar_answer)))
      else:
        a = lookup_answers[0]
        rows = a[0]
        col = a[1]
        rows_answer = []
        for row in rows:
          row_answer = ''
          if col < 15:
            list_answer = dat.custom_tables[table_key].number_columns[col][row]
          else:
            list_answer = dat.custom_tables[table_key].word_columns[col-15][row]
          if type(list_answer) == float:
            row_answer = str(int(list_answer))
          else:
            for l in list_answer:
              row_answer += " " + str(l)
          rows_answer.append(row_answer)

        final_answer = ','.join(rows_answer)

        if final_answer[0] == ' ':
          final_answer = final_answer[1:]

        final_predictions.append(final_answer)

  return final_predictions
Exemplo n.º 5
0
def evaluate(sess, data, batch_size, graph, i):
  #computes accuracy
  num_examples = 0.0
  gc = 0.0
  for j in range(0, len(data) - batch_size + 1, batch_size):
    [ct] = sess.run([graph.final_correct],
                    feed_dict=data_utils.generate_feed_dict(data, j, batch_size,
                                                            graph))
    gc += ct * batch_size
    num_examples += batch_size
  print "dev set accuracy   after ", i, " : ", gc / num_examples
  print num_examples, len(data)
  print "--------"
Exemplo n.º 6
0
def evaluate(sess, data, batch_size, graph, i):
    #computes accuracy
    num_examples = 0.0
    gc = 0.0
    for j in range(0, len(data) - batch_size + 1, batch_size):
        [ct] = sess.run([graph.final_correct],
                        feed_dict=data_utils.generate_feed_dict(
                            data, j, batch_size, graph))
        gc += ct * batch_size
        num_examples += batch_size
    print "dev set accuracy   after ", i, " : ", gc / num_examples
    print num_examples, len(data)
    print "--------"
def get_prediction(sess, data, graph, utility, debug=True, curr=0, batch_size=1):

  debugging =  {
    'question': '',
    'table_key': '',
    'correct': True,
    'threshold': 0.0,
    'steps': [],
    'answer_neural': [],
    'cells_answer_neural': [],
    'is_lookup_neural': True,
    'answer_feedback': [],
    'cells_answer_feedback': [],
    'is_lookup_feedback': True,
    'below_threshold': False
  }

  steps = sess.run([graph.steps], feed_dict=data_utils.generate_feed_dict(data, curr, batch_size, graph))
  ops = steps[0]['ops']
  cols = steps[0]['cols']
  rows = steps[0]['rows']
  soft_ops = steps[0]['soft_ops']
  soft_cols = steps[0]['soft_cols']
  certainty = 0
  print("------------- Debugging step by step -------------")
  for i in range(len(ops)):
    step =  {
      'index': i,
      'operation_index': 0,
      'operation_name': '',
      'operation_softmax': 0,
      'column_index': 0,
      'column_name': '',
      'column_softmax': 0,
      'rows': [],
      'correct': True
    }

    op_index = np.where(ops[i] == 1)[1][0]
    op_name = utility.operations_set[op_index]
    op_certainty = soft_ops[i][0][op_index]
    step['operation_index'] = op_index
    step['operation_name'] = op_name
    step['operation_softmax'] = op_certainty

    col_index = np.where(cols[i] == 1)[1][0]
    if col_index < 15:
      col = data[0].number_column_names[col_index]
      step['column_index'] = data[0].number_column_indices[col_index]
    else:
      col = data[0].word_column_names[col_index-15]
      step['column_index'] = data[0].word_column_indices[col_index-15]

    col_name = ""
    for c in col:
      if c !='dummy_token':
        col_name += c + " "

    col_certainty = soft_cols[i][0][col_index]

    step['column_name'] = col_name[:-1]
    step['column_softmax'] = col_certainty
    
    row_index =  np.ndarray.tolist(np.where(rows[i] == 1)[1])
    step['rows'] = row_index
    debugging['steps'].append(step)

    certainty_step = op_certainty * col_certainty
    certainty += certainty_step
    print("Certainty step: " + str(certainty_step) + " with cols: " + str(col_certainty) + " certainty ops: " + str(op_certainty))
    print("Step" + str(i) + ": Operation " + op_name + ", Column " + col_name + " and Rows: ", row_index)
  certainty = (certainty / len(ops)) * 100
  print("CERTAINTY: " + str(certainty))
  print("---------------------------------------")

  answers = sess.run([graph.answers], feed_dict=data_utils.generate_feed_dict(data, curr, batch_size, graph))
  scalar_answer = answers[0][0][0]
  lookup_answer = answers[0][1][0]
  print("Scalar output:", scalar_answer)
  print("Lookup output:")
  return_scalar = True
  lookup_answers = []
  j = 0
  for col in range(len(lookup_answer)):
    if not all(p == 0 for p in lookup_answer[col]):
      return_scalar = False
      if col < 15:
        col_index = data[0].number_column_indices[col]
        col_name = data[j].number_column_names[col]
      else:
        col_index = data[0].word_column_indices[col-15]
        col_name = data[j].word_column_names[col-15]
      rows = [i for i, e in enumerate(lookup_answer[col]) if e != 0]
      for r in rows:
        debugging['cells_answer_neural'].append([r, col_index])
      lookup_answers.append([col_name, [i for i, e in enumerate(lookup_answer[col]) if e != 0], col])
      #print("Column name:", col_name, ", Selection;", [i for i, e in enumerate(lookup_answer[col]) if e != 0])
  debugging['threshold'] = FLAGS.certainty_threshold
  if return_scalar:
    debugging['is_lookup_neural'] = False
    return ([scalar_answer, debugging], 'scalar', certainty)
  else:
    debugging['is_lookup_neural'] = True
    return ([lookup_answers, debugging], 'lookup', certainty)