def get_last_features(self, x, reuse): x_has_timesteps = (x.get_shape().ndims == 5) if x_has_timesteps: sh = tf.shape(x) x = flatten_two_dims(x) #with tf.variable_scope(self.scope + "_features", reuse=reuse): with tf.variable_scope(self.scope+"_features", reuse=reuse): x = (tf.to_float(x) - self.ob_mean) / self.ob_std x = small_convnet(x, nl=self.nl, feat_dim=self.feat_dim, last_nl=None, layernormalize=self.layernormalize) if x_has_timesteps: x = unflatten_first_dim(x, sh) x = tf.reshape(x, [-1, sh[1], self.feat_dim]) with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE): init_1 = tf.contrib.rnn.LSTMStateTuple(self.last_c_in_1, self.last_h_in_1) if self.lstm2_size: init_2 = tf.contrib.rnn.LSTMStateTuple(self.last_c_in_2, self.last_h_in_2) if self.aux_input: prev_rews = tf.expand_dims(self.ph_last_rew, -1) x = tf.concat([x, prev_rews], -1) x, c_out_1, h_out_1 = lstm(self.lstm1_size)(x, initial_state=init_1) if self.lstm2_size: if self.aux_input: prev_acs = tf.one_hot(self.ph_last_ac, depth=self.num_actions) x = tf.concat([x, tf.cast(prev_acs, tf.float32)], -1) x = tf.concat([x, self.ph_last_vel], -1) x, c_out_2, h_out_2 = lstm(self.lstm2_size)(x, initial_state=init_2) return x
def __init__(self, ob_space, ac_space, hidsize, ob_mean, ob_std, feat_dim, layernormalize, nl, scope="policy"): if layernormalize: print( "Warning: policy is operating on top of layer-normed features. It might slow down the training." ) self.layernormalize = layernormalize self.nl = nl self.ob_mean = ob_mean self.ob_std = ob_std self.ob_space = ob_space self.ac_space = ac_space self.ac_pdtype = make_pdtype(ac_space) self.pd = self.vpred = None self.hidsize = hidsize self.feat_dim = feat_dim self.scope = scope pdparamsize = self.ac_pdtype.param_shape()[0] self.features_model = small_convnet(self.ob_space, nl=self.nl, feat_dim=self.feat_dim, last_nl=None, layernormalize=self.layernormalize) self.pd_hidden = torch.nn.Sequential( torch.nn.Linear(feat_dim, hidsize), torch.nn.ReLU(), torch.nn.Linear(hidsize, hidsize), torch.nn.ReLU(), ) self.pd_head = torch.nn.Linear(hidsize, pdparamsize) self.vf_head = torch.nn.Linear(hidsize, 1) self.param_list = [ dict(params=self.features_model.parameters()), dict(params=self.pd_hidden.parameters()), dict(params=self.pd_head.parameters()), dict(params=self.vf_head.parameters()) ] self.flat_features = None self.pd = None self.vpred = None self.ac = None self.ob = None
def get_features(self, x, reuse): nl = tf.nn.leaky_relu x_has_timesteps = (x.get_shape().ndims == 5) if x_has_timesteps: sh = tf.shape(x) x = flatten_two_dims(x) with tf.variable_scope(self.scope + "_features", reuse=reuse): x = (tf.to_float(x) - self.ob_mean) / self.ob_std x = small_convnet(x, nl=nl, feat_dim=self.feat_dim, last_nl=nl, layernormalize=False) if x_has_timesteps: x = unflatten_first_dim(x, sh) return x
def get_features(self, x, reuse): x_has_timesteps = (x.get_shape().ndims == 5) if x_has_timesteps: sh = tf.shape(x) x = flatten_two_dims(x) with tf.variable_scope(self.scope + "_features", reuse=reuse): x = tf.to_float(x) x = small_convnet(x, nl=self.nl, feat_dim=self.feat_dim, last_nl=None, layernormalize=self.layernormalize) if x_has_timesteps: x = unflatten_first_dim(x, sh) return x
def get_features(self, x, reuse): if (x.get_shape().ndims == 5): shape = tf.shape(x) x = flatten_two_dims(x) with tf.variable_scope(self.scope + '_features', reuse=reuse): x = (tf.cast(x, tf.float32) - self.ob_mean) / self.ob_std x = small_convnet(x, nl=self.nl, feat_dim=self.feat_dim, last_nl=None, layernormalize=self.layernormalize) if (x.get_shape().ndims == 5): x = unflatten_first_dim(x, shape) return x
def __init__(self, auxiliary_task, predict_from_pixels, feat_dim=None, scope='dynamics'): self.scope = scope self.auxiliary_task = auxiliary_task self.hidsize = self.auxiliary_task.hidsize self.feat_dim = feat_dim self.ac_space = self.auxiliary_task.ac_space self.ob_mean = self.auxiliary_task.ob_mean self.ob_std = self.auxiliary_task.ob_std self.predict_from_pixels = predict_from_pixels self.param_list = [] if predict_from_pixels: self.features_model = small_convnet(self.ob_space, nl=torch.nn.LeakyReLU, feat_dim=self.feat_dim, last_nl=torch.nn.LeakyReLU, layernormalize=False) self.param_list = self.param_list + [ dict(params=self.features_model.parameters()) ] else: self.features_model = None # not understand why we need a net in the origin implementation self.loss_net = loss_net(nblocks=4, feat_dim=self.feat_dim, ac_dim=self.ac_space.n, out_feat_dim=self.feat_dim, hidsize=self.hidsize) self.param_list = self.param_list + [ dict(params=self.loss_net.parameters()) ] self.features = None self.next_features = None self.ac = None self.ob = None
def __init__(self, policy, features_shared_with_policy, feat_dim=None, layernormalize=None, scope='feature_extractor'): self.scope = scope self.features_shared_with_policy = features_shared_with_policy self.feat_dim = feat_dim self.layernormalize = layernormalize self.policy = policy self.hidsize = policy.hidsize self.ob_space = policy.ob_space self.ac_space = policy.ac_space self.ob_mean = self.policy.ob_mean self.ob_std = self.policy.ob_std self.features_shared_with_policy = features_shared_with_policy self.param_list = [] if features_shared_with_policy: self.features_model = None else: self.features_model = small_convnet( self.ob_space, nl=torch.nn.LeakyReLU, feat_dim=self.feat_dim, last_nl=None, layernormalize=self.layernormalize) self.param_list = self.param_list + [ dict(params=self.features_model.parameters()) ] self.scope = scope self.features = None self.next_features = None self.ac = None self.ob = None
def __init__(self, policy, features_shared_with_policy, feat_dim=None, layernormalize=False, spherical_obs=False): assert not layernormalize, "VAE features should already have reasonable size, no need to layer normalize them" super(VAE, self).__init__( scope="vae", policy=policy, features_shared_with_policy=features_shared_with_policy, feat_dim=feat_dim, layernormalize=False) self.features_model = small_convnet(self.ob_space, nl=torch.nn.LeakyReLU, feat_dim=2 * self.feat_dim, last_nl=None, layernormalize=False) self.decoder_model = small_deconvnet(self.ob_space, feat_dim=self.feat_dim, nl=torch.nn.LeakyReLU, ch=4 if spherical_obs else 8, positional_bias=True) self.param_list = [ dict(params=self.features_model.parameters()), dict(params=self.decoder_model.parameters()) ] self.features_std = None self.next_features_std = None self.spherical_obs = spherical_obs if self.spherical_obs: self.scale = torch.nn.Parameter(torch.tensor(1.0), requires_grad=True) self.param_list = self.param_list + [dict(params=[self.scale])]