def get_last_features(self, x, reuse):
        x_has_timesteps = (x.get_shape().ndims == 5)
        if x_has_timesteps:
            sh = tf.shape(x)
            x = flatten_two_dims(x)

        #with tf.variable_scope(self.scope + "_features", reuse=reuse):
        with tf.variable_scope(self.scope+"_features", reuse=reuse):
            x = (tf.to_float(x) - self.ob_mean) / self.ob_std
            x = small_convnet(x, nl=self.nl, feat_dim=self.feat_dim, last_nl=None, layernormalize=self.layernormalize)

            if x_has_timesteps:
                x = unflatten_first_dim(x, sh)
            x = tf.reshape(x, [-1, sh[1], self.feat_dim])
        with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE):
            init_1 = tf.contrib.rnn.LSTMStateTuple(self.last_c_in_1, self.last_h_in_1)
            if self.lstm2_size:
                init_2 = tf.contrib.rnn.LSTMStateTuple(self.last_c_in_2, self.last_h_in_2)
            if self.aux_input:
                prev_rews = tf.expand_dims(self.ph_last_rew, -1)
                x = tf.concat([x, prev_rews], -1)
            x, c_out_1, h_out_1 = lstm(self.lstm1_size)(x, initial_state=init_1)
            if self.lstm2_size:
                if self.aux_input:
                    prev_acs = tf.one_hot(self.ph_last_ac, depth=self.num_actions)
                    x = tf.concat([x, tf.cast(prev_acs, tf.float32)], -1)
                    x = tf.concat([x, self.ph_last_vel], -1)

                x, c_out_2, h_out_2  = lstm(self.lstm2_size)(x, initial_state=init_2)
        return x
Пример #2
0
    def __init__(self,
                 ob_space,
                 ac_space,
                 hidsize,
                 ob_mean,
                 ob_std,
                 feat_dim,
                 layernormalize,
                 nl,
                 scope="policy"):
        if layernormalize:
            print(
                "Warning: policy is operating on top of layer-normed features. It might slow down the training."
            )
        self.layernormalize = layernormalize
        self.nl = nl
        self.ob_mean = ob_mean
        self.ob_std = ob_std
        self.ob_space = ob_space
        self.ac_space = ac_space
        self.ac_pdtype = make_pdtype(ac_space)

        self.pd = self.vpred = None
        self.hidsize = hidsize
        self.feat_dim = feat_dim
        self.scope = scope
        pdparamsize = self.ac_pdtype.param_shape()[0]

        self.features_model = small_convnet(self.ob_space,
                                            nl=self.nl,
                                            feat_dim=self.feat_dim,
                                            last_nl=None,
                                            layernormalize=self.layernormalize)

        self.pd_hidden = torch.nn.Sequential(
            torch.nn.Linear(feat_dim, hidsize),
            torch.nn.ReLU(),
            torch.nn.Linear(hidsize, hidsize),
            torch.nn.ReLU(),
        )
        self.pd_head = torch.nn.Linear(hidsize, pdparamsize)
        self.vf_head = torch.nn.Linear(hidsize, 1)

        self.param_list = [
            dict(params=self.features_model.parameters()),
            dict(params=self.pd_hidden.parameters()),
            dict(params=self.pd_head.parameters()),
            dict(params=self.vf_head.parameters())
        ]

        self.flat_features = None
        self.pd = None
        self.vpred = None
        self.ac = None
        self.ob = None
Пример #3
0
 def get_features(self, x, reuse):
     nl = tf.nn.leaky_relu
     x_has_timesteps = (x.get_shape().ndims == 5)
     if x_has_timesteps:
         sh = tf.shape(x)
         x = flatten_two_dims(x)
     with tf.variable_scope(self.scope + "_features", reuse=reuse):
         x = (tf.to_float(x) - self.ob_mean) / self.ob_std
         x = small_convnet(x, nl=nl, feat_dim=self.feat_dim, last_nl=nl, layernormalize=False)
     if x_has_timesteps:
         x = unflatten_first_dim(x, sh)
     return x
Пример #4
0
    def get_features(self, x, reuse):
        x_has_timesteps = (x.get_shape().ndims == 5)
        if x_has_timesteps:
            sh = tf.shape(x)
            x = flatten_two_dims(x)

        with tf.variable_scope(self.scope + "_features", reuse=reuse):
            x = tf.to_float(x)
            x = small_convnet(x, nl=self.nl, feat_dim=self.feat_dim, last_nl=None, layernormalize=self.layernormalize)

        if x_has_timesteps:
            x = unflatten_first_dim(x, sh)
        return x
Пример #5
0
 def get_features(self, x, reuse):
     if (x.get_shape().ndims == 5):
         shape = tf.shape(x)
         x = flatten_two_dims(x)
     with tf.variable_scope(self.scope + '_features', reuse=reuse):
         x = (tf.cast(x, tf.float32) - self.ob_mean) / self.ob_std
         x = small_convnet(x,
                           nl=self.nl,
                           feat_dim=self.feat_dim,
                           last_nl=None,
                           layernormalize=self.layernormalize)
     if (x.get_shape().ndims == 5):
         x = unflatten_first_dim(x, shape)
     return x
Пример #6
0
    def __init__(self,
                 auxiliary_task,
                 predict_from_pixels,
                 feat_dim=None,
                 scope='dynamics'):
        self.scope = scope
        self.auxiliary_task = auxiliary_task
        self.hidsize = self.auxiliary_task.hidsize
        self.feat_dim = feat_dim
        self.ac_space = self.auxiliary_task.ac_space
        self.ob_mean = self.auxiliary_task.ob_mean
        self.ob_std = self.auxiliary_task.ob_std
        self.predict_from_pixels = predict_from_pixels
        self.param_list = []
        if predict_from_pixels:
            self.features_model = small_convnet(self.ob_space,
                                                nl=torch.nn.LeakyReLU,
                                                feat_dim=self.feat_dim,
                                                last_nl=torch.nn.LeakyReLU,
                                                layernormalize=False)
            self.param_list = self.param_list + [
                dict(params=self.features_model.parameters())
            ]
        else:
            self.features_model = None

        # not understand why we need a net in the origin implementation
        self.loss_net = loss_net(nblocks=4,
                                 feat_dim=self.feat_dim,
                                 ac_dim=self.ac_space.n,
                                 out_feat_dim=self.feat_dim,
                                 hidsize=self.hidsize)
        self.param_list = self.param_list + [
            dict(params=self.loss_net.parameters())
        ]

        self.features = None
        self.next_features = None
        self.ac = None
        self.ob = None
    def __init__(self,
                 policy,
                 features_shared_with_policy,
                 feat_dim=None,
                 layernormalize=None,
                 scope='feature_extractor'):
        self.scope = scope
        self.features_shared_with_policy = features_shared_with_policy
        self.feat_dim = feat_dim
        self.layernormalize = layernormalize
        self.policy = policy
        self.hidsize = policy.hidsize
        self.ob_space = policy.ob_space
        self.ac_space = policy.ac_space
        self.ob_mean = self.policy.ob_mean
        self.ob_std = self.policy.ob_std

        self.features_shared_with_policy = features_shared_with_policy
        self.param_list = []
        if features_shared_with_policy:
            self.features_model = None
        else:
            self.features_model = small_convnet(
                self.ob_space,
                nl=torch.nn.LeakyReLU,
                feat_dim=self.feat_dim,
                last_nl=None,
                layernormalize=self.layernormalize)
            self.param_list = self.param_list + [
                dict(params=self.features_model.parameters())
            ]

        self.scope = scope

        self.features = None
        self.next_features = None
        self.ac = None
        self.ob = None
    def __init__(self,
                 policy,
                 features_shared_with_policy,
                 feat_dim=None,
                 layernormalize=False,
                 spherical_obs=False):
        assert not layernormalize, "VAE features should already have reasonable size, no need to layer normalize them"
        super(VAE, self).__init__(
            scope="vae",
            policy=policy,
            features_shared_with_policy=features_shared_with_policy,
            feat_dim=feat_dim,
            layernormalize=False)

        self.features_model = small_convnet(self.ob_space,
                                            nl=torch.nn.LeakyReLU,
                                            feat_dim=2 * self.feat_dim,
                                            last_nl=None,
                                            layernormalize=False)
        self.decoder_model = small_deconvnet(self.ob_space,
                                             feat_dim=self.feat_dim,
                                             nl=torch.nn.LeakyReLU,
                                             ch=4 if spherical_obs else 8,
                                             positional_bias=True)

        self.param_list = [
            dict(params=self.features_model.parameters()),
            dict(params=self.decoder_model.parameters())
        ]

        self.features_std = None
        self.next_features_std = None

        self.spherical_obs = spherical_obs
        if self.spherical_obs:
            self.scale = torch.nn.Parameter(torch.tensor(1.0),
                                            requires_grad=True)
            self.param_list = self.param_list + [dict(params=[self.scale])]