def __init__(self, batch_env): super(AutoencoderWrapper, self).__init__(batch_env) self._observ = self._observ = tf.Variable( tf.zeros((len(self),) + self.observ_shape, self.observ_dtype), trainable=False) with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): autoencoder_hparams = autoencoders.autoencoder_discrete_pong() self.autoencoder_model = autoencoders.AutoencoderOrderedDiscrete( autoencoder_hparams, tf.estimator.ModeKeys.EVAL) self.autoencoder_model.set_mode(tf.estimator.ModeKeys.EVAL)
def setup_autoencoder(self): if self.autoencoder_model is not None: return with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): autoencoder_hparams = autoencoders.autoencoder_discrete_pong() autoencoder_hparams.data_dir = "unused" autoencoder_hparams.problem_hparams = self.get_hparams( autoencoder_hparams) autoencoder_hparams.problem = self self.autoencoder_model = autoencoders.AutoencoderOrderedDiscrete( autoencoder_hparams, tf.estimator.ModeKeys.EVAL)
def __init__(self, batch_env): super(AutoencoderWrapper, self).__init__(batch_env) batch_size, height, width, _ = self._batch_env.observ.get_shape().as_list() ae_height = int(math.ceil(height / self.autoencoder_factor)) ae_width = int(math.ceil(width / self.autoencoder_factor)) ae_channels = 24 # TODO(piotrmilos): make it better observ_shape = (batch_size, ae_height, ae_width, ae_channels) self._observ = self._observ = tf.Variable( tf.zeros(observ_shape, tf.float32), trainable=False) with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): autoencoder_hparams = autoencoders.autoencoder_discrete_pong() self.autoencoder_model = autoencoders.AutoencoderOrderedDiscrete( autoencoder_hparams, tf.estimator.ModeKeys.EVAL) self.autoencoder_model.set_mode(tf.estimator.ModeKeys.EVAL)
def autoencode(self, image, sess): with tf.Graph().as_default(): hparams = autoencoders.autoencoder_discrete_pong() hparams.data_dir = "unused" hparams.problem_hparams = self.get_hparams(hparams) hparams.problem = self model = autoencoders.AutoencoderOrderedDiscrete( hparams, tf.estimator.ModeKeys.EVAL) img = tf.constant(image) img = tf.to_int32(tf.reshape( img, [1, 1, self.frame_height, self.frame_width, self.num_channels])) encoded = model.encode(img) model_saver = tf.train.Saver(tf.global_variables()) model_saver.restore(sess, FLAGS.autoencoder_path) return sess.run(encoded)
def autoencoder_factor(self): """By how much to divide sizes when using autoencoders.""" hparams = autoencoders.autoencoder_discrete_pong() return 2**hparams.num_hidden_layers
def _setup(self): if self.make_extra_debug_info: self.report_reward_statistics_every = 10 self.dones = 0 self.real_reward = 0 self.real_env.reset() # Slight weirdness to make sim env and real env aligned for _ in range(simulated_batch_env.SimulatedBatchEnv. NUMBER_OF_HISTORY_FRAMES): self.real_ob, _, _, _ = self.real_env.step(0) self.total_sim_reward, self.total_real_reward = 0.0, 0.0 self.successful_episode_reward_predictions = 0 in_graph_wrappers = self.in_graph_wrappers + [(atari.MemoryWrapper, {}) ] env_hparams = tf.contrib.training.HParams( in_graph_wrappers=in_graph_wrappers, simulated_environment=self.simulated_environment) generator_batch_env = batch_env_factory(self.environment_spec, env_hparams, num_agents=1, xvfb=False) with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): if FLAGS.agent_policy_path: policy_lambda = self.collect_hparams.network else: # When no agent_policy_path is set, just generate random samples. policy_lambda = rl.random_policy_fun policy_factory = tf.make_template( "network", functools.partial(policy_lambda, self.environment_spec().action_space, self.collect_hparams), create_scope_now_=True, unique_name_="network") with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): self.collect_hparams.epoch_length = 10 _, self.collect_trigger_op = collect.define_collect( policy_factory, generator_batch_env, self.collect_hparams, eval_phase=False, scope="define_collect") if FLAGS.autoencoder_path: # TODO(lukaszkaiser): remove hard-coded autoencoder params. with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): autoencoder_hparams = autoencoders.autoencoder_discrete_pong() autoencoder_hparams.data_dir = "unused" autoencoder_hparams.problem_hparams = self.get_hparams( autoencoder_hparams) autoencoder_hparams.problem = self autoencoder_model = autoencoders.AutoencoderOrderedDiscrete( autoencoder_hparams, tf.estimator.ModeKeys.EVAL) shape = [ self.raw_frame_height, self.raw_frame_width, self.num_channels ] self.autoencoder_feed = tf.placeholder(tf.int32, shape=shape) autoencoded = autoencoder_model.encode( tf.reshape(self.autoencoder_feed, [1, 1] + shape)) autoencoded = tf.reshape(autoencoded, [ self.frame_height, self.frame_width, self.num_channels, 8 ]) # 8-bit groups. self.autoencoder_result = discretization.bit_to_int( autoencoded, 8) self.avilable_data_size_op = atari.MemoryWrapper.singleton.speculum.size( ) self.data_get_op = atari.MemoryWrapper.singleton.speculum.dequeue()