示例#1
0
    def __init__(self,
                 name,
                 input_dim,
                 output_dim,
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=tf.nn.relu,
                 output_nonlinearity=None,
                 input_var=None,
                 **kwargs):
        Serializable.quick_init(self, locals())

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.name = name
        self.input_var = input_var

        self.hidden_sizes = hidden_sizes
        self.hidden_nonlinearity = hidden_nonlinearity
        self.output_nonlinearity = output_nonlinearity

        self.batch_normalization = kwargs.get('batch_normalization', False)

        self._params = None
        self._assign_ops = None
        self._assign_phs = None
示例#2
0
    def __init__(self, *args, **kwargs):
        # store the init args for serialization and call the super constructors
        Serializable.quick_init(self, locals())
        Layer.__init__(self, *args, **kwargs)
        self._cell_type = kwargs.get('cell_type', 'gru')
        self.state_var = kwargs.get('state_var', None)

        self.build_graph()
示例#3
0
    def __init__(self, *args, **kwargs):
        # store the init args for serialization and call the super constructors
        Serializable.quick_init(self, locals())
        Layer.__init__(self, *args, **kwargs)
        self.num_filters = kwargs.get('num_filters')
        self.kernel_sizes = kwargs.get('kernel_sizes')
        self.strides = kwargs.get('strides')
        self.hidden_nonlinearity = kwargs.get('hidden_nonlinearity')
        self.output_nonlinearity = kwargs.get('output_nonlinearity')

        self.build_graph()
示例#4
0
    def __init__(
            self,
            env,
            policy,
            num_rollouts,
            max_path_length,
            n_parallel=1,
            vae=None,

    ):
        Serializable.quick_init(self, locals())
        super(Sampler, self).__init__(env, policy, n_parallel, max_path_length)

        self.total_samples = num_rollouts * max_path_length
        self.n_parallel = n_parallel
        self.total_timesteps_sampled = 0
        self.vae = vae

        # setup vectorized environment

        if self.n_parallel > 1:
            self.vec_env = ParallelEnvExecutor(env, n_parallel, num_rollouts, self.max_path_length)
        else:
            self.vec_env = IterativeEnvExecutor(env, num_rollouts, self.max_path_length)
示例#5
0
    def __init__(
        self,
        env,
        scale_reward=1.,
        normalize_obs=False,
        normalize_reward=False,
        obs_alpha=0.001,
        reward_alpha=0.001,
        normalization_scale=1.,
    ):
        Serializable.quick_init(self, locals())

        self._scale_reward = 1
        self._wrapped_env = env

        self._normalize_obs = normalize_obs
        self._normalize_reward = normalize_reward
        self._obs_alpha = obs_alpha
        self._obs_mean = np.zeros(self.observation_space.shape)
        self._obs_var = np.ones(self.observation_space.shape)
        self._reward_alpha = reward_alpha
        self._reward_mean = 0.
        self._reward_var = 1.
        self._normalization_scale = normalization_scale
示例#6
0
    def __init__(self, *args, **kwargs):
        # store the init args for serialization and call the super constructors
        Serializable.quick_init(self, locals())
        Layer.__init__(self, *args, **kwargs)

        self.build_graph()
示例#7
0
    def __init__(
        self,
        name,
        env,
        hidden_sizes=(500, 500),
        hidden_nonlinearity="tanh",
        output_nonlinearity=None,
        batch_size=500,
        learning_rate=0.001,
        weight_normalization=True,
        normalize_input=True,
        optimizer=tf.train.AdamOptimizer,
        valid_split_ratio=0.2,
        rolling_average_persitency=0.99,
        buffer_size=100000,
    ):

        Serializable.quick_init(self, locals())

        self.normalization = None
        self.normalize_input = normalize_input
        self.use_reward_model = False
        self.buffer_size = buffer_size
        self.name = name
        self.hidden_sizes = hidden_sizes

        self._dataset_train = None
        self._dataset_test = None
        self.next_batch = None

        self.valid_split_ratio = valid_split_ratio
        self.rolling_average_persitency = rolling_average_persitency
        self.hidden_nonlinearity = hidden_nonlinearity = self._activations[
            hidden_nonlinearity]
        self.output_nonlinearity = output_nonlinearity = self._activations[
            output_nonlinearity]

        with tf.variable_scope(name):
            self.batch_size = batch_size
            self.learning_rate = learning_rate

            # determine dimensionality of state and action space
            self.obs_space_dims = env.observation_space.shape[0]
            self.action_space_dims = env.action_space.shape[0]

            # placeholders
            self.obs_ph = tf.placeholder(tf.float32,
                                         shape=(None, self.obs_space_dims))
            self.act_ph = tf.placeholder(tf.float32,
                                         shape=(None, self.action_space_dims))
            self.delta_ph = tf.placeholder(tf.float32,
                                           shape=(None, self.obs_space_dims))

            self._create_stats_vars()

            # concatenate action and observation --> NN input
            self.nn_input = tf.concat([self.obs_ph, self.act_ph], axis=1)

            # create MLP
            mlp = MLP(name,
                      output_dim=self.obs_space_dims,
                      hidden_sizes=hidden_sizes,
                      hidden_nonlinearity=hidden_nonlinearity,
                      output_nonlinearity=output_nonlinearity,
                      input_var=self.nn_input,
                      input_dim=self.obs_space_dims + self.action_space_dims,
                      weight_normalization=weight_normalization)

            self.delta_pred = mlp.output_var

            # define loss and train_op
            self.loss = tf.reduce_mean(
                tf.linalg.norm(self.delta_ph - self.delta_pred, axis=-1))
            self.optimizer = optimizer(self.learning_rate)
            self.train_op = self.optimizer.minimize(self.loss)

            # tensor_utils
            self.f_delta_pred = compile_function([self.obs_ph, self.act_ph],
                                                 self.delta_pred)

        self._networks = [mlp]
示例#8
0
    def __init__(
        self,
        name,
        env,
        num_models=5,
        hidden_sizes=(512, 512),
        hidden_nonlinearity='swish',
        output_nonlinearity=None,
        batch_size=500,
        learning_rate=0.001,
        weight_normalization=False,  # Doesn't work
        normalize_input=True,
        optimizer=tf.train.AdamOptimizer,
        valid_split_ratio=0.2,  # 0.1
        rolling_average_persitency=0.99,
        buffer_size=50000,
        loss_str='MSE',
    ):

        Serializable.quick_init(self, locals())

        max_logvar = 1
        min_logvar = 0.1

        self.normalization = None
        self.normalize_input = normalize_input
        self.next_batch = None

        self.valid_split_ratio = valid_split_ratio
        self.rolling_average_persitency = rolling_average_persitency

        self.buffer_size_train = int(buffer_size * (1 - valid_split_ratio))
        self.buffer_size_test = int(buffer_size * valid_split_ratio)
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.num_models = num_models
        self.hidden_sizes = hidden_sizes
        self.name = name
        self._dataset_train = None
        self._dataset_test = None

        # determine dimensionality of state and action space
        self.obs_space_dims = obs_space_dims = env.observation_space.shape[0]
        self.action_space_dims = action_space_dims = env.action_space.shape[0]
        self.timesteps_counter = 0
        self.used_timesteps_counter = 0

        self.hidden_nonlinearity = hidden_nonlinearity = self._activations[
            hidden_nonlinearity]
        self.output_nonlinearity = output_nonlinearity = self._activations[
            output_nonlinearity]
        """ computation graph for training and simple inference """
        with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
            # placeholders
            self.obs_ph = tf.placeholder(tf.float32,
                                         shape=(None, obs_space_dims))
            self.act_ph = tf.placeholder(tf.float32,
                                         shape=(None, action_space_dims))
            self.delta_ph = tf.placeholder(tf.float32,
                                           shape=(None, obs_space_dims))

            self._create_stats_vars()

            # concatenate action and observation --> NN input
            self.nn_input = tf.concat([self.obs_ph, self.act_ph], axis=1)
            obs_ph = tf.split(self.nn_input, self.num_models, axis=0)

            # create MLP
            mlps = []
            delta_preds = []
            self.obs_next_pred = []
            for i in range(num_models):
                with tf.variable_scope('model_{}'.format(i),
                                       reuse=tf.AUTO_REUSE):
                    mlp = MLP(
                        name + '/model_{}'.format(i),
                        output_dim=obs_space_dims,
                        hidden_sizes=hidden_sizes,
                        hidden_nonlinearity=hidden_nonlinearity,
                        output_nonlinearity=output_nonlinearity,
                        input_var=obs_ph[i],
                        input_dim=obs_space_dims + action_space_dims,
                    )
                    mlps.append(mlp)

                delta_preds.append(mlp.output_var)

            self.delta_pred = tf.stack(
                delta_preds, axis=2)  # shape: (batch_size, ndim_obs, n_models)

            # define loss and train_op
            if loss_str == 'L2':
                self.loss = tf.reduce_mean(
                    tf.linalg.norm(self.delta_ph[:, :, None] - self.delta_pred,
                                   axis=1))
            elif loss_str == 'MSE':
                self.loss = tf.reduce_mean(
                    (self.delta_ph[:, :, None] - self.delta_pred)**2)
            else:
                raise NotImplementedError

            self.optimizer = optimizer(learning_rate=self.learning_rate)
            self.train_op = self.optimizer.minimize(self.loss)

            # tensor_utils
            self.f_delta_pred = compile_function([self.obs_ph, self.act_ph],
                                                 self.delta_pred)
        """ computation graph for inference where each of the models receives a different batch"""
        with tf.variable_scope(name, reuse=True):
            # placeholders
            self.obs_model_batches_stack_ph = tf.placeholder(
                tf.float32, shape=(None, obs_space_dims))
            self.act_model_batches_stack_ph = tf.placeholder(
                tf.float32, shape=(None, action_space_dims))
            self.delta_model_batches_stack_ph = tf.placeholder(
                tf.float32, shape=(None, obs_space_dims))

            # split stack into the batches for each model --> assume each model receives a batch of the same size
            self.obs_model_batches = tf.split(self.obs_model_batches_stack_ph,
                                              self.num_models,
                                              axis=0)
            self.act_model_batches = tf.split(self.act_model_batches_stack_ph,
                                              self.num_models,
                                              axis=0)
            self.delta_model_batches = tf.split(
                self.delta_model_batches_stack_ph, self.num_models, axis=0)

            # reuse previously created MLP but each model receives its own batch
            delta_preds = []
            self.obs_next_pred = []
            self.loss_model_batches = []
            self.train_op_model_batches = []
            for i in range(num_models):
                with tf.variable_scope('model_{}'.format(i), reuse=True):
                    # concatenate action and observation --> NN input
                    nn_input = tf.concat(
                        [self.obs_model_batches[i], self.act_model_batches[i]],
                        axis=1)
                    mlp = MLP(name + '/model_{}'.format(i),
                              output_dim=obs_space_dims,
                              hidden_sizes=hidden_sizes,
                              hidden_nonlinearity=hidden_nonlinearity,
                              output_nonlinearity=output_nonlinearity,
                              input_var=nn_input,
                              input_dim=obs_space_dims + action_space_dims,
                              weight_normalization=weight_normalization)

                delta_preds.append(mlp.output_var)

                # define loss and train_op
                if loss_str == 'L2':
                    loss = tf.reduce_mean(
                        tf.linalg.norm(self.delta_model_batches[i] -
                                       mlp.output_var,
                                       axis=1))
                elif loss_str == 'MSE':
                    loss = tf.reduce_mean(
                        (self.delta_model_batches[i] - mlp.output_var)**2)
                else:
                    raise NotImplementedError
                self.loss_model_batches.append(loss)
                self.train_op_model_batches.append(
                    optimizer(learning_rate=self.learning_rate).minimize(loss))

            self.delta_pred_model_batches_stack = tf.concat(
                delta_preds,
                axis=0)  # shape: (batch_size_per_model*num_models, ndim_obs)

            # tensor_utils
            self.f_delta_pred_model_batches = compile_function([
                self.obs_model_batches_stack_ph,
                self.act_model_batches_stack_ph
            ], self.delta_pred_model_batches_stack)

        self._networks = mlps