def test_deep_q_all(self):
     self.config_test()
     self.build_cnn(self.num_actions)
     restorer = tf.train.Saver()
     total_rewards_ph = tf.placeholder(tf.int64)
     tf.summary.scalar('test/total_reward', total_rewards_ph)
     summary_op = tf.summary.merge_all()
     summary_writer = tf.summary.FileWriter(logdir=self.save_dir)
     sess = tf.Session()
     ckpt_list = list_getter(self.save_dir, 'index', 'model')
     for ckpt in ckpt_list:
         restorer.restore(sess, ckpt.replace('.index', ''))
         total_reward = 0
         for episode in range(self.num_repeat_episode):
             state = self.initialize_game()
             for step in range(self.max_step_per_episode):
                 feed = {self.state: np.expand_dims(state, axis=0),
                         self.is_train: False}
                 Qs = sess.run(self.output, feed_dict=feed)
                 action = np.argmax(Qs)
                 angle = action * 4 + 20
                 state, is_end, reward = self.take_action(angle)
                 if is_end:
                     break
         ckpt_num = int(ckpt.split('-')[-1].split('.')[0])
         print(ckpt_num)
         average_total_reward = float(total_reward) / float(self.num_repeat_episode)
         summary_writer.add_summary(sess.run(summary_op, {total_rewards_ph: average_total_reward}), ckpt_num)
     sess.close()
Пример #2
0
 def test_deep_q(self):
     raise NotImplemented('this method should be modified')
     self.config_test()
     self.build_cnn(self.num_actions)
     restorer = tf.train.Saver()
     total_rewards_ph = tf.placeholder(tf.int64)
     tf.summary.scalar('test/total_reward', total_rewards_ph)
     sess = tf.Session()
     ckpt = list_getter(self.save_dir, 'index', 'model')[-1]
     ckpt = ckpt.replace('.index', '')
     restorer.restore(sess, ckpt.replace('.index', ''))
     total_reward = 0
     for episode in range(self.num_repeat_episode):
         self.initialize_game()
         frame, _, _ = self.take_action(
             np.random.randint(0, self.num_actions), 0)
         state_queue = deque([frame for _ in range(self.state_stack_num)],
                             maxlen=self.state_stack_num)
         for step in range(self.max_step_per_episode):
             state = np.stack(state_queue, axis=2)
             action = np.argmax(
                 sess.run(
                     self.prob_distribution, {
                         self.state: np.expand_dims(state, axis=0),
                         self.is_train: False
                     }))
             frame, is_end, reward = self.take_action(action, step)
             # if is_end:
             #     break
             state_queue.append(frame)
             total_reward += reward
     sess.close()
     average_total_reward = float(total_reward) / float(
         self.num_repeat_episode)
     print("average total reward: %d" % average_total_reward)
Пример #3
0
 def test_deep_q(self):
     self.config_test()
     self.build_cnn(self.num_actions)
     restorer = tf.train.Saver()
     total_rewards_ph = tf.placeholder(tf.int64)
     tf.summary.scalar('test/total_reward', total_rewards_ph)
     sess = tf.Session()
     ckpt = list_getter(self.save_dir, 'index', 'model')[-1]
     ckpt = ckpt.replace('.index', '')
     restorer.restore(sess, ckpt.replace('.index', ''))
     total_reward = 0
     for episode in range(self.num_repeat_episode):
         self.initialize_game()
         state_queue = deque(maxlen=self.state_stack_num)
         # iterate to fill get very first state
         for i in range(self.state_stack_num):
             frame, _, _ = self.take_action(0)
             state_queue.append(frame)
         for step in range(self.max_step_per_episode):
             state = np.stack(state_queue, axis=2)
             action = np.argmax(
                 sess.run(
                     self.output, {
                         self.state: np.expand_dims(state, axis=0),
                         self.is_train: False
                     }))
             frame, is_end, reward = self.take_action(action)
             # if is_end:
             #     break
             state_queue.append(frame)
             total_reward += reward
     sess.close()
     average_total_reward = float(total_reward) / float(
         self.num_repeat_episode)
     print("average total reward: %d" % average_total_reward)
Пример #4
0
    def _input_from_image(self):
        def inspect_file_extension(target_list):
            extensions = list(
                set([
                    os.path.basename(img_name).split(".")[-1]
                    for img_name in target_list
                ]))
            if len(extensions) > 1:
                raise ValueError("Multiple image formats are used:")
            elif len(extensions) == 0:
                raise ValueError("no image files exist")

        def inspect_pairness(list1, list2):
            if not len(list1) == len(list2):
                raise ValueError("number of images are different")
            for file1, file2 in zip(list1, list2):
                file1_name = os.path.basename(file1).split(".")[-2]
                file2_name = os.path.basename(file2).split(".")[-2]
                if not file1_name == file2_name:
                    raise ValueError("image names are different: %s | %s" %
                                     (file2, file1))

        img_list = list_getter(self.config.img_dir, "jpg")
        img_list_tensor = tf.convert_to_tensor(img_list, dtype=tf.string)
        img_data = tf.data.Dataset.from_tensor_slices(img_list_tensor)
        if self.config.phase == "eval":
            gt_list = list_getter(self.seg_dir, "png")
            inspect_pairness(gt_list, img_list)
            inspect_file_extension(gt_list)
            inspect_file_extension(img_list)
            gt_list_tensor = tf.convert_to_tensor(gt_list, dtype=tf.string)
            gt_data = tf.data.Dataset.from_tensor_slices(gt_list_tensor)
            data = tf.data.Dataset.zip((img_data, gt_data))
            data = data.map(self._image_gt_parser,
                            4).batch(self.config.batch_size, False)
        else:
            data = img_data.map(self._image_parser,
                                4).batch(self.config.batch_size, False)
        data = data.prefetch(4)  # tf.data_pipeline.experimental.AUTOTUNE
        iterator = data.make_initializable_iterator()
        dataset = iterator.get_next()
        self.input_data = dataset["input_data"]
        self.gt = dataset["gt"] if self.config.phase == "eval" else None
        self.filename = dataset["filename"]
        self.data_init = iterator.initializer
Пример #5
0
 def build(self):
     tfrecord_list = list_getter(self.tfrecord_dir, extension="tfrecord")
     if not tfrecord_list:
         raise ValueError("tfrecord does not exist: %s" % self.tfrecord_dir)
     data = tf.data.TFRecordDataset(tfrecord_list, num_parallel_reads=auto)
     data = data.map(self._tfrecord_parser, auto)
     if self.is_train_set:
         data = data.shuffle(self.batch_size * 10)
     data = data.prefetch(auto)
     data = data.batch(self.batch_size, drop_remainder=self.is_train_set)
     return data
Пример #6
0
    def _get_ckpt_in_range(self):
        all_ckpt_list = [_.split(".index")[0] for _ in list_getter(self.config.model_dir, 'index')]
        ckpt_pattern = './model/checkpoints/model_step-%d'
        if self.config.ckpt_start == 'beginning':
            start_idx = 0
        else:
            start_idx = all_ckpt_list.index(ckpt_pattern % self.config.ckpt_start)

        if self.config.ckpt_end == 'end':
            end_idx = None
        else:
            end_idx = all_ckpt_list.index(ckpt_pattern % self.config.ckpt_end) + 1
        return all_ckpt_list[start_idx:end_idx:self.config.ckpt_step]
Пример #7
0
 def _build(self):
     tfrecord_list = list_getter(self._src_dir, "tfrecord")
     if not tfrecord_list:
         raise ValueError("tfrecord is not given")
     data = tf.data.TFRecordDataset(tfrecord_list)
     data = data.shuffle(self._batch * 10)
     data = data.map(self._parser, 4).batch(self._batch, True)
     data = data.prefetch(2)
     iterator = data.make_initializable_iterator()
     data_batched = iterator.get_next()
     self.image = data_batched["image"]
     self.gt = data_batched["gt"]
     self.data_init = iterator.initializer
Пример #8
0
 def test_deep_q_all(self):
     self.config_test()
     self.build_cnn(self.num_actions)
     restorer = tf.train.Saver()
     total_rewards_ph = tf.placeholder(tf.int64)
     tf.summary.scalar('test/total_reward', total_rewards_ph)
     summary_op = tf.summary.merge_all()
     summary_writer = tf.summary.FileWriter(logdir=self.save_dir)
     sess = tf.Session()
     ckpt_list = list_getter(self.save_dir, 'index', 'model')
     for ckpt in ckpt_list:
         ckpt = ckpt.replace('.index', '')
         restorer.restore(sess, ckpt.replace('.index', ''))
         total_reward = 0
         for episode in range(self.num_repeat_episode):
             self.initialize_game()
             state_queue = deque(maxlen=self.state_stack_num)
             # iterate to fill get very first state
             for i in range(self.state_stack_num):
                 frame, _, _ = self.take_action(0)
                 state_queue.append(frame)
             for step in range(self.max_step_per_episode):
                 state = np.stack(state_queue, axis=2)
                 action = np.argmax(
                     sess.run(
                         self.output, {
                             self.state: np.expand_dims(state, axis=0),
                             self.is_train: False
                         }))
                 if is_end:
                     break
                 frame, is_end, reward = self.take_action(action)
                 state_queue.append(frame)
                 total_reward += reward
         ckpt_num = int(ckpt.split('-')[-1])
         print(ckpt_num)
         average_total_reward = float(total_reward) / float(
             self.num_repeat_episode)
         summary_writer.add_summary(
             sess.run(summary_op, {total_rewards_ph: average_total_reward}),
             ckpt_num)
     sess.close()
Пример #9
0
 def _vis_with_video(self, sess):
     vid_list = list_getter(self.config.img_dir, ("avi", "mp4"))
     for vid_name in vid_list:
         vid = VideoCapture(vid_name)
         fps = round(vid.get(5))
         should_continue, frame = vid.read()
         basename = os.path.basename(vid_name)[:-4]
         dst_name = self.config.vis_result_dir + "/" + basename + ".avi"
         h, w, _ = frame.shape
         pred = sess.run(self.pred, {self.input_data: np.expand_dims(frame, 0)})
         superimposed = self._superimpose(frame, pred)
         vid_out = VideoWriter(dst_name, VideoWriter_fourcc(*"XVID"), fps, (w, h))
         vid_out.write(superimposed.astype(np.uint8))
         while should_continue:
             should_continue, frame = vid.read()
             if should_continue:
                 pred = sess.run(self.pred, {self.input_data: np.expand_dims(frame, 0)})
                 superimposed = self._superimpose(frame, pred)
                 vid_out.write(superimposed.astype(np.uint8))
         vid_out.release()
 def test_deep_q(self):
     self.config_test()
     self.build_cnn(self.num_actions)
     restorer = tf.train.Saver()
     sess = tf.Session()
     ckpt = list_getter(self.save_dir, 'index', 'model')[-1]
     restorer.restore(sess, ckpt.replace('.index', ''))
     total_reward = 0
     for episode in range(self.num_repeat_episode):
         state = self.initialize_game()
         for step in range(self.max_step_per_episode):
             feed = {self.state: np.expand_dims(state, axis=0),
                     self.is_train: False}
             Qs = sess.run(self.output, feed_dict=feed)
             action = np.argmax(Qs)
             angle = action * 4 + 20
             state, is_end, reward = self.take_action(angle)
             total_reward += reward
             if is_end:
                 break
             total_reward += reward
     print("average_total_reward =%.4f" % (float(total_reward) / float(self.num_repeat_episode)))
     sess.close()
Пример #11
0
 def _get_ckpt(self):
     all_ckpt_list = [_.split(".index")[0] for _ in list_getter(self.config.model_dir, 'index')]
     ckpt_pattern = './model/checkpoints/model_step-%d'
     return all_ckpt_list[all_ckpt_list.index(ckpt_pattern % self.config.ckpt_id)]
    def train_deep_q(self):
        self.config_train()
        self.build_cnn(self.num_actions)
        self.state_population = Queue()
        onehot_actions = tf.one_hot(self.actions, self.num_actions)
        q_value = tf.reduce_sum(tf.multiply(self.output, onehot_actions), axis=1)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        self.loss = tf.reduce_mean(tf.square(self.q_hat - q_value))
        optm = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss)
        self.optm = tf.group([optm, update_ops])
        saver = tf.train.Saver(max_to_keep=1000)
        total_reward_ph = tf.placeholder(tf.int64)
        tf.summary.scalar('train/reward_gain', total_reward_ph)
        action_ph = tf.placeholder(tf.int64)
        tf.summary.histogram('train/action', action_ph)

        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(logdir=self.save_dir)

        sess = tf.Session()
        length = 0
        ckpt_list = list_getter(self.save_dir, 'index', 'model')
        if not ckpt_list:
            sess.run(tf.global_variables_initializer())
            self.fill_state_population()
            ep_start = 0
        else:
            length = 999999
            saver.restore(sess, ckpt_list[-1].replace('.index', ''))
            self.fill_state_population(sess, True)
            ep_start = int(ckpt_list[-1].split('-')[-1].split('.')[0])
        for episode in range(ep_start + 1, self.train_episodes):
            tic = time.time()
            action_record = []
            total_reward = 0.0
            current_state = self.initialize_game()
            # Note:
            # current_state[:, :, 1] = next_state[:, :, 0]
            # current_state[:, :, 2] = next_state[:, :, 1]
            # current_state[:, :, 3] = next_state[:, :, 2]
            for step in range(self.max_step_per_episode):
                length += 1
                # Explore or Exploit
                explore_prob = self.explore_stop + \
                               (self.explore_start - self.explore_stop) * \
                               np.exp(-self.decay_rate * length)
                if explore_prob > np.random.rand():
                    # explore and get random action
                    action = random.randint(0, 35)  # min and max shooting angle
                else:
                    # Get action from the model
                    feed = {self.state: np.expand_dims(current_state, axis=0),
                            self.is_train: False}
                    Qs = sess.run(self.output, feed_dict=feed)
                    action = np.argmax(Qs)
                    action_record.append(action)

                angle = action * 4 + 20  # angle changes by 4, minimum of 20 maxinum of 160
                next_state, is_end, reward = self.take_action(angle)

                if is_end:
                    self.state_population.add((current_state,
                                               action,
                                               reward,
                                               next_state))
                    break
                total_reward += reward
                self.state_population.add((current_state,
                                           action,
                                           reward,
                                           next_state))

                current_state = next_state

                # Sample mini-batch from state_queue
                batch = self.state_population.sample(self.batch_size)
                current_state_batch = np.array([each[0] for each in batch])
                actions_batch = np.array([each[1] for each in batch])
                rewards_batch = np.array([each[2] for each in batch])
                next_state_batch = np.array([each[3] for each in batch])

                # Q values for the next_state, which is going to be our target Q
                target_Qs = sess.run(self.output, feed_dict={self.state: next_state_batch,
                                                             self.is_train: True})
                end_game_index = rewards_batch < 0
                target_Qs[end_game_index] = np.zeros(self.num_actions)

                q_hat = rewards_batch + self.gamma * np.max(target_Qs, axis=1)

                loss, _ = sess.run([self.loss, self.optm], feed_dict={self.state: current_state_batch,
                                                                      self.q_hat: q_hat,
                                                                      self.actions: actions_batch,
                                                                      self.is_train: True})

            print('Episode: {},'.format(episode),
                  'total_reward: {:.4f},'.format(total_reward),
                  'explor prob: {:.4f}'.format(explore_prob),
                  'duration: {:.4f}'.format(time.time() - tic))
            summary_writer.add_summary(sess.run(summary_op, {total_reward_ph: total_reward,
                                                             action_ph: action_record}), episode)
            if episode % 50 == 0:
                saver.save(sess, self.save_dir + '/model', episode)