def _after_reset(self, observation): if not self.enabled: return # Reset the stat count self.stats_recorder.after_reset(observation) # Close any existing video recorder if self.video_recorder: self._close_video_recorder() # Start recording the next video. # # TODO: calculate a more correct 'episode_id' upon merge self.video_recorder = video_recorder.VideoRecorder( env=self.env, base_path=os.path.join( self.directory, '{}.video.{}.video{:06}'.format(self.file_prefix, self.file_infix, self.episode_id)), metadata={'episode_id': self.episode_id}, enabled=self._video_enabled(), ) self.video_recorder.capture_frame() # Bump *after* all reset activity has finished self.episode_id += 1 self.flush()
def _reset_video_recorder(self): # Close any existing video recorder if self.video_recorder: self._close_video_recorder() # Start recording the next video. # # TODO: calculate a more correct 'episode_id' upon merge self.video_recorder = video_recorder.VideoRecorder( env=self.env, base_path=os.path.join(self.directory, '{}.video.{}.video{:06}'.format(self.file_prefix, self.file_infix, self.episode_id)), metadata={'episode_id': self.episode_id}, enabled=self._video_enabled(), ) self.video_recorder.capture_frame()
def main(_): env = create_env(FLAGS.env_id, seed=FLAGS.seed) if FLAGS.movie_path: video = video_recorder.VideoRecorder(env=env, base_path=FLAGS.movie_path) else: video = None tf.set_random_seed(FLAGS.seed) with tf.Session() as sess: agent = Agent(env, sess) agent.run(FLAGS.num_episodes, video) if video is not None: print("Saving movie to {}.mp4".format(FLAGS.movie_path)) video.close()
clearn.get_reward(img_test) saver = tf.train.Saver() saver.restore(sess, "train_weights/weights") sess.run(tf.global_variables_initializer()) r, enc_img = sess.run( [clearn.reward, clearn.enc_img], { graph_input: dummy_array, graph_input2: dummy_array, img_test: state }) reward = r # print("reward =", r) return reward, enc_img recorder = video_recorder.VideoRecorder(env, "numpy_vids/vid81.mp4") time_steps_lived = [] clearn = CartPoleLearn() c = 0 expert_vid = np.load("numpy_vids/vid%d.npy" % 81) expert_average = np.zeros(shape=state_size) sift = cv2.xfeatures2d.SIFT_create(nfeatures=6) for e in range(50000): env.reset() recorder.env = env recorded_frames = None ts = 1 cp_state = None
graph_input = tf.placeholder(tf.float32, (batch_size, 36, 64, 3,)) # expert vid graph_input2 = tf.placeholder(tf.float32, (batch_size, 36, 64, 3,)) # recorded vid img_test = tf.placeholder(tf.float32, (1, 36, 64, 3,)) clearn.build_encoder(graph_input, graph_input2) clearn.get_reward(img_test) saver = tf.train.Saver() saver.restore(sess, "train_weights/weights") sess.run(tf.global_variables_initializer()) r, enc_img = sess.run([clearn.reward, clearn.enc_img], {graph_input:dummy_array , graph_input2: dummy_array, img_test: state}) reward = r # print("reward =", r) return reward, enc_img recorder = video_recorder.VideoRecorder(env, "/home/bayes/Academic/DeepRL/Project/IMLearnPG/numpy_vids/vid81.mp4") time_steps_lived = [] #clearn = CartPoleLearn() c=0 expert_vid = np.load("numpy_vids/vid%d.npy" % 81) pca = PCA(n_components=4) expert_vid = list(map(lambda x: normalize_img(crop_image_gray(x).ravel()), expert_vid)) expert_vid = pca.fit_transform(expert_vid) expert_average = np.zeros(shape=state_size) exp_e_frame = np.zeros(shape=state_size)