Пример #1
0
  observation_last_labels.append(observation_last_label_t)
  observation_images.append(observation_image_t)
  action_t = tf.one_hot(tf.random_uniform([sim.batch_size_t], maxval=num_actions, dtype=tf.int32), num_actions)
  actions.append(action_t)
  reward_t, (observation_image_t, observation_last_label_t) = sim.do_step(action_t)
  rewards.append(reward_t)

observation_images_t = tf.pack(observation_images, 1)
actions_t = tf.pack(actions, 1)
observation_last_labels_t = tf.pack(observation_last_labels, 1)
rewards_t = tf.pack(rewards, 1)

sess = tf.Session()
#sess.run(init_op)

images, labels, last_labels = data.get_batch_of_episodes(BATCH_SIZE, TIME_STEPS, CLASSES_PER_EPISODE)
observation_images, actions, observation_last_labels, rewards = sess.run((observation_images_t, actions_t, observation_last_labels_t, rewards_t), 
                                                                         { images_t: images,
                                                                           #labels_t: labels,
                                                                           last_labels_t: last_labels })
b = random.choice(range(BATCH_SIZE))
for t in range(actions.shape[1]):
  image = observation_images[b,t]
  im = Image.fromarray(np.uint8(image*255))
  print t
  print 'response from last question: ', observation_last_labels[b,t]
  print 'current image'
  print 'action for current image: ', actions[b,t]
  print 'reward for current image + current action: ', rewards[b,t]
  im.show()
  raw_input()
Пример #2
0
        #                                                         accuracy_1st_summary_t: accuracy_1st,
        #                                                         accuracy_2nd_summary_t: accuracy_2nd,
        #                                                         accuracy_5th_summary_t: accuracy_5th,
        #                                                         accuracy_10th_summary_t: accuracy_10th,
        #                                                         question_ave_summary_t: question_ave,
        #                                                         question_1st_summary_t: question_1st,
        #                                                         question_2nd_summary_t: question_2nd,
        #                                                         question_5th_summary_t: question_5th,
        #                                                         question_10th_summary_t: question_10th
        #                                                       }), e+1)
        # train_writer.flush()

        # process a test batch
        images, labels, last_labels = data.get_batch_of_episodes(
            BATCH_SIZE,
            TIME_STEPS,
            CLASSES_PER_EPISODE,
            NUM_LABELS,
            use_test_data=True)
        true_labels = np.argmax(labels[:, :-1], axis=2)
        predictions, rewards, loss = sess.run(
            [predictions_t, rewards_t, loss_t], {
                images_t: images,
                labels_t: labels,
                last_labels_t: last_labels,
                epsilon_t: 0.0
            })

        reward_max = np.mean(rewards)
        pred_labels = np.argmax(predictions, axis=2)
        accuracy_max = np.mean(true_labels == pred_labels)
        num_correct_predictions = np.sum(