def _build(self, *inputs, name=None): """ Output of the model given input placeholder(s). User should implement _build() inside their subclassed model, and construct the computation graphs in this function. Args: inputs: Tensor input(s), recommended to be position arguments, e.g. def _build(self, state_input, action_input, name=None). It would be usually same as the inputs in build(). name (str): Inner model name, also the variable scope of the inner model, if exist. One example is garage.tf.models.Sequential. Return: output: Tensor output(s) of the model. """ # the inputs are y1_and_v1_ph # the input values are not used, only the dimensions are used self.k_pre_var = parameter(input_var=inputs[0], length=self.n_springs, initializer=tf.random_uniform_initializer( minval=self.k_pre_init_lb, maxval=self.k_pre_init_ub), trainable=True, name='k_pre') self.k_ts = tf.math.add( tf.math.sigmoid(self.k_pre_var) * tf.compat.v1.constant( self.k_range, dtype=tf.float32, name='k_range'), tf.compat.v1.constant(self.k_lb, dtype=tf.float32, name='k_lb'), name='k') # the mean in the output of this model only contains k's,but log_std contains the stds for f and k's self.log_std_var = parameter(input_var=inputs[0], length=1 + self.n_springs, initializer=tf.constant_initializer( self.f_and_k_log_std_init), trainable=True, name='log_std') return self.k_ts, self.log_std_var
def test_param(self): param = parameter(input_var=self.input_vars, length=3, initializer=tf.constant_initializer( self.initial_params)) self.sess.run(tf.compat.v1.global_variables_initializer()) p = self.sess.run(param, feed_dict=self.feed_dict) assert p.shape == (5, 3) assert np.all(p == self.initial_params)
def _build(self, state_input, name=None): """Build model given input placeholder(s). Args: state_input (tf.Tensor): Place holder for state input. name (str): Inner model name, also the variable scope of the inner model, if exist. One example is garage.tf.models.Sequential. Return: tf.Tensor: Sampled action. tf.Tensor: Mean. tf.Tensor: Parameterized log_std. tf.Tensor: log_std. tfp.distributions.MultivariateNormalDiag: Distribution. """ del name action_dim = self._output_dim with tf.compat.v1.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an CNN b = np.concatenate([ np.zeros(action_dim), np.full(action_dim, self._init_std_param) ], axis=0) # yapf: disable mean_std_conv = cnn( input_var=state_input, filters=self._filters, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, strides=self._strides, padding=self._padding, name='mean_std_cnn') mean_std_network = mlp( mean_std_conv, output_dim=action_dim * 2, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=tf.constant_initializer(b), name='mean_std_network', layer_normalization=self._layer_normalization) with tf.compat.v1.variable_scope('mean_network'): mean_network = mean_std_network[..., :action_dim] with tf.compat.v1.variable_scope('log_std_network'): log_std_network = mean_std_network[..., action_dim:] else: # separate MLPs for mean and std networks # mean network mean_conv = cnn(input_var=state_input, filters=self._filters, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, strides=self._strides, padding=self._padding, name='mean_cnn') mean_network = mlp( mean_conv, output_dim=action_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=self._output_b_init, name='mean_network', layer_normalization=self._layer_normalization) # std network if self._adaptive_std: log_std_conv = cnn( input_var=state_input, filters=self._std_filters, hidden_nonlinearity=self._std_hidden_nonlinearity, hidden_w_init=self._std_hidden_w_init, hidden_b_init=self._std_hidden_b_init, strides=self._std_strides, padding=self._std_padding, name='log_std_cnn') log_std_network = mlp( log_std_conv, output_dim=action_dim, hidden_sizes=self._std_hidden_sizes, hidden_nonlinearity=self._std_hidden_nonlinearity, hidden_w_init=self._std_hidden_w_init, hidden_b_init=self._std_hidden_b_init, output_nonlinearity=self._std_output_nonlinearity, output_w_init=self._std_output_w_init, output_b_init=tf.constant_initializer( self._init_std_param), name='log_std_network', layer_normalization=self._layer_normalization) else: log_std_network = parameter( input_var=state_input, length=action_dim, initializer=tf.constant_initializer( self._init_std_param), trainable=self._learn_std, name='log_std_network') mean_var = mean_network std_param = log_std_network with tf.compat.v1.variable_scope('std_limits'): if self._min_std_param is not None: std_param = tf.maximum(std_param, self._min_std_param) if self._max_std_param is not None: std_param = tf.minimum(std_param, self._max_std_param) with tf.compat.v1.variable_scope('std_parameterization'): # build std_var with std parameterization if self._std_parameterization == 'exp': log_std_var = std_param else: # we know it must be softplus here log_std_var = tf.math.log(tf.math.log(1. + tf.exp(std_param))) dist = tfp.distributions.MultivariateNormalDiag( loc=mean_var, scale_diag=tf.exp(log_std_var)) rnd = tf.random.normal(shape=mean_var.get_shape().as_list()[1:], seed=deterministic.get_tf_seed_stream()) action_var = rnd * tf.exp(log_std_var) + mean_var return action_var, mean_var, log_std_var, std_param, dist
def _build(self, state_input, name=None): """Build model. Args: state_input (tf.Tensor): Entire time-series observation input. name (str): Inner model name, also the variable scope of the inner model, if exist. One example is garage.tf.models.Sequential. Returns: tfp.distributions.MultivariateNormalDiag: Distribution. tf.tensor: Mean. tf.Tensor: Log of standard deviation. """ del name action_dim = self._output_dim with tf.compat.v1.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an MLP b = np.concatenate([ np.zeros(action_dim), np.full(action_dim, self._init_std_param) ], axis=0) # yapf: disable mean_std_network = mlp( state_input, output_dim=action_dim * 2, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=tf.constant_initializer(b), name='mean_std_network', layer_normalization=self._layer_normalization) with tf.compat.v1.variable_scope('mean_network'): mean_network = mean_std_network[..., :action_dim] with tf.compat.v1.variable_scope('log_std_network'): log_std_network = mean_std_network[..., action_dim:] else: # separate MLPs for mean and std networks # mean network mean_network = mlp( state_input, output_dim=action_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=self._output_b_init, name='mean_network', layer_normalization=self._layer_normalization) # std network if self._adaptive_std: log_std_network = mlp( state_input, output_dim=action_dim, hidden_sizes=self._std_hidden_sizes, hidden_nonlinearity=self._std_hidden_nonlinearity, hidden_w_init=self._std_hidden_w_init, hidden_b_init=self._std_hidden_b_init, output_nonlinearity=self._std_output_nonlinearity, output_w_init=self._std_output_w_init, output_b_init=tf.constant_initializer( self._init_std_param), name='log_std_network', layer_normalization=self._layer_normalization) else: log_std_network = parameter( input_var=state_input, length=action_dim, initializer=tf.constant_initializer( self._init_std_param), trainable=self._learn_std, name='log_std_network') log_std_network = tf.expand_dims(log_std_network, 1) mean_var = mean_network std_param = log_std_network with tf.compat.v1.variable_scope('std_limits'): if self._min_std_param is not None: std_param = tf.maximum(std_param, self._min_std_param) if self._max_std_param is not None: std_param = tf.minimum(std_param, self._max_std_param) with tf.compat.v1.variable_scope('std_parameterization'): # build std_var with std parameterization if self._std_parameterization == 'exp': log_std_var = std_param else: # we know it must be softplus here log_std_var = tf.math.log(tf.math.log(1. + tf.exp(std_param))) return tfp.distributions.MultivariateNormalDiag( loc=mean_var, scale_diag=tf.exp(log_std_var)), mean_var, log_std_var
def _build(self, state_input, name=None): """Build model given input placeholder(s). Args: state_input (tf.Tensor): Place holder for state input. name (str): Inner model name, also the variable scope of the inner model, if exist. One example is garage.tf.models.Sequential. Return: tf.Tensor: Mean. tf.Tensor: Parameterized log_std. tf.Tensor: log_std. garage.tf.distributions.DiagonalGaussian: Policy distribution. """ del name action_dim = self._output_dim with tf.compat.v1.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an MLP b = np.concatenate([ np.zeros(action_dim), np.full(action_dim, self._init_std_param) ], axis=0) # yapf: disable mean_std_network = mlp( state_input, output_dim=action_dim * 2, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=tf.constant_initializer(b), name='mean_std_network', layer_normalization=self._layer_normalization) with tf.compat.v1.variable_scope('mean_network'): mean_network = mean_std_network[..., :action_dim] with tf.compat.v1.variable_scope('log_std_network'): log_std_network = mean_std_network[..., action_dim:] else: # separate MLPs for mean and std networks # mean network mean_network = mlp( state_input, output_dim=action_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=self._output_b_init, name='mean_network', layer_normalization=self._layer_normalization) # std network if self._adaptive_std: log_std_network = mlp( state_input, output_dim=action_dim, hidden_sizes=self._std_hidden_sizes, hidden_nonlinearity=self._std_hidden_nonlinearity, hidden_w_init=self._std_hidden_w_init, hidden_b_init=self._std_hidden_b_init, output_nonlinearity=self._std_output_nonlinearity, output_w_init=self._std_output_w_init, output_b_init=tf.constant_initializer( self._init_std_param), name='log_std_network', layer_normalization=self._layer_normalization) else: log_std_network = parameter( input_var=state_input, length=action_dim, initializer=tf.constant_initializer( self._init_std_param), trainable=self._learn_std, name='log_std_network') mean_var = mean_network std_param = log_std_network with tf.compat.v1.variable_scope('std_limits'): if self._min_std_param is not None: std_param = tf.maximum(std_param, self._min_std_param) if self._max_std_param is not None: std_param = tf.minimum(std_param, self._max_std_param) with tf.compat.v1.variable_scope('std_parameterization'): # build std_var with std parameterization if self._std_parameterization == 'exp': log_std_var = std_param else: # we know it must be softplus here log_std_var = tf.math.log(tf.math.log(1. + tf.exp(std_param))) dist = DiagonalGaussian(self._output_dim) return mean_var, log_std_var, std_param, dist
def _build(self, state_input, name=None): action_dim = self._output_dim with tf.compat.v1.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an CNN b = np.concatenate([ np.zeros(action_dim), np.full(action_dim, self._init_std_param) ], axis=0) # yapf: disable mean_std_conv = cnn( input_var=state_input, filter_dims=self._filter_dims, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, num_filters=self._num_filters, strides=self._strides, padding=self._padding, name='mean_std_cnn') mean_std_network = mlp( mean_std_conv, output_dim=action_dim * 2, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=tf.constant_initializer(b), name='mean_std_network', layer_normalization=self._layer_normalization) with tf.compat.v1.variable_scope('mean_network'): mean_network = mean_std_network[..., :action_dim] with tf.compat.v1.variable_scope('log_std_network'): log_std_network = mean_std_network[..., action_dim:] else: # separate MLPs for mean and std networks # mean network mean_conv = cnn(input_var=state_input, filter_dims=self._filter_dims, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, num_filters=self._num_filters, strides=self._strides, padding=self._padding, name='mean_cnn') mean_network = mlp( mean_conv, output_dim=action_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=self._output_b_init, name='mean_network', layer_normalization=self._layer_normalization) # std network if self._adaptive_std: log_std_conv = cnn( input_var=state_input, filter_dims=self._std_filter_dims, hidden_nonlinearity=self._std_hidden_nonlinearity, hidden_w_init=self._std_hidden_w_init, hidden_b_init=self._std_hidden_b_init, num_filters=self._std_num_filters, strides=self._std_strides, padding=self._std_padding, name='log_std_cnn') log_std_network = mlp( log_std_conv, output_dim=action_dim, hidden_sizes=self._std_hidden_sizes, hidden_nonlinearity=self._std_hidden_nonlinearity, hidden_w_init=self._std_hidden_w_init, hidden_b_init=self._std_hidden_b_init, output_nonlinearity=self._std_output_nonlinearity, output_w_init=self._std_output_w_init, output_b_init=tf.constant_initializer( self._init_std_param), name='log_std_network', layer_normalization=self._layer_normalization) else: log_std_network = parameter( input_var=state_input, length=action_dim, initializer=tf.constant_initializer( self._init_std_param), trainable=self._learn_std, name='log_std_network') mean_var = mean_network std_param = log_std_network with tf.compat.v1.variable_scope('std_limits'): if self._min_std_param is not None: std_param = tf.maximum(std_param, self._min_std_param) if self._max_std_param is not None: std_param = tf.minimum(std_param, self._max_std_param) with tf.compat.v1.variable_scope('std_parameterization'): # build std_var with std parameterization if self._std_parameterization == 'exp': log_std_var = std_param else: # we know it must be softplus here log_std_var = tf.math.log(tf.math.log(1. + tf.exp(std_param))) dist = DiagonalGaussian(self._output_dim) rnd = tf.random.normal(shape=mean_var.get_shape().as_list()[1:]) action_var = rnd * tf.exp(log_std_var) + mean_var return action_var, mean_var, log_std_var, std_param, dist
def _build(self, *inputs, name=None): """ Output of the model given input placeholder(s). User should implement _build() inside their subclassed model, and construct the computation graphs in this function. Args: inputs: Tensor input(s), recommended to be position arguments, e.g. def _build(self, state_input, action_input, name=None). It would be usually same as the inputs in build(). name (str): Inner model name, also the variable scope of the inner model, if exist. One example is garage.tf.models.Sequential. Return: output: Tensor output(s) of the model. """ # the inputs are y1_and_v1_ph y1_and_v1_ph = inputs[0] y1_and_v1_ph_normalized = y1_and_v1_ph / [ self.pos_range, self.half_vel_range ] self.k_pre_var = parameter( input_var=y1_and_v1_ph, length=self.n_springs, # initializer=tf.constant_initializer(self.k_pre_init), initializer=tf.random_uniform_initializer( minval=self.k_pre_init_lb, maxval=self.k_pre_init_ub), # initializer=tf.glorot_uniform_initializer(), trainable=True, name='k_pre') self.k_ts_normalized = tf.math.sigmoid(self.k_pre_var) y1_v1_k_ts_normalized = tf.concat( [y1_and_v1_ph_normalized, self.k_ts_normalized], axis=1, name='y1_v1_k') f_ts_normalized = mlp(y1_v1_k_ts_normalized, 1, self.comp_policy_network_size, name='mlp', hidden_nonlinearity=tf.math.tanh, output_nonlinearity=tf.math.tanh) self.f_ts = f_ts_normalized * self.half_force_range self.k_ts = tf.math.add(self.k_ts_normalized * tf.compat.v1.constant( self.k_range, dtype=tf.float32, name='k_range'), tf.compat.v1.constant(self.k_lb, dtype=tf.float32, name='k_lb'), name='k') # k_ts_stop_grad = tf.stop_gradient(self.k_ts) # we should not stop gradient, but should see k as an actual action, f_and_k_ts = tf.concat([self.f_ts, self.k_ts], axis=1, name='f_and_k') self.debug_ts = tf.gradients(f_and_k_ts, self.k_pre_var) self.log_std_var = parameter(input_var=y1_and_v1_ph, length=1 + self.n_springs, initializer=tf.constant_initializer( self.f_and_k_log_std_init), trainable=True, name='log_std') return f_and_k_ts, self.log_std_var
def _build(self, *inputs, name=None): """ Output of the model given input placeholder(s). User should implement _build() inside their subclassed model, and construct the computation graphs in this function. Args: inputs: Tensor input(s), recommended to be position arguments, e.g. def _build(self, state_input, action_input, name=None). It would be usually same as the inputs in build(). name (str): Inner model name, also the variable scope of the inner model, if exist. One example is garage.tf.models.Sequential. Return: output: Tensor output(s) of the model. """ i_ph_normalized, y1_and_v1_ph = inputs[ 0] # i_ph_normalized: (?, 1), y1_and_v1_ph: (?, 2) f_ts_normalized = i_ph_normalized * self.trq_const / self.r_shaft f_ts = tf.multiply( f_ph_normalized[:, 0], tf.compat.v1.constant(self.half_force_range, dtype=tf.float32, name='half_force_range'), name='f') # scalar-tensor multiplication # f_ts: (?,) y1_ph = y1_and_v1_ph[:, 0] # y1_ph: (?,) # self.k_pre_var = tf.compat.v1.get_variable('k_pre', initializer=[self.k_pre_init,] * self.n_springs, dtype=tf.float32, trainable=True) k_pre_init = np.float32( np.random.uniform(self.k_pre_init_lb, self.k_pre_init_ub, size=(self.n_springs, ))) self.k_pre_var = tf.compat.v1.get_variable('k_pre', initializer=k_pre_init, dtype=tf.float32, trainable=True) self.k_ts = tf.math.add( tf.nn.sigmoid(self.k_pre_var) * tf.compat.v1.constant( self.k_range, dtype=tf.float32, name='k_range'), tf.compat.v1.constant(self.k_lb, dtype=tf.float32, name='k_lb'), name='k') self.k_sum_ts = tf.math.reduce_sum( self.k_ts) # only for monitoring the k y1_mat = tf.transpose(tf.tile([y1_ph], [self.n_springs, 1]), name='y1_mat') # y1_mat: (?, self.n_springs), [[y1[1], y1[1], ...], ...[y1[?], y1[?], ...]] f_spring_ts = -tf.linalg.matvec(y1_mat, self.k_ts, name='f_spring') # f_spring_ts: (?,), -[y1[1]*k[1]+y1[1]*k[2]+... , ... , y1[?]*k[1]+y1[?]*k[2]+...] pi_ts = tf.add(f_ts, f_spring_ts, name='pi') # pi_ts (?,) # f_ts_stop_grad = tf.compat.v1.stop_gradient(f_ts) # we should not stop gradient, but should see k as an actual action with the ability to backprop # pi_and_f_ts = tf.concat([tf.expand_dims(pi_ts, axis=-1), tf.expand_dims(f_ts, axis=-1)], axis=1) pi_and_f_ts = tf.stack([pi_ts, f_ts], axis=1, name='pi_and_f') # pi_and_f_ts: (?, 2) self.debug_ts = tf.gradients(tf.log(pi_and_f_ts), self.k_pre_var) self.log_std_var = parameter( input_var= y1_and_v1_ph, # actually not linked to the input, this is just to match the dimension of the inputs for batches length=2, initializer=tf.constant_initializer(self.pi_and_f_log_std_init), trainable=True, name='log_std') # shape: (?, 2) return pi_and_f_ts, self.log_std_var # always see the combo (of pi and f) as the action
def _build(self, inputs, name=None): """ Output of the model given input placeholder(s). User should implement _build() inside their subclassed model, and construct the computation graphs in this function. Args: inputs: Tensor input(s), recommended to be position arguments, e.g. def _build(self, state_input, action_input, name=None). It would be usually same as the inputs in build(). name (str): Inner model name, also the variable scope of the inner model, if exist. One example is garage.tf.models.Sequential. Return: output: Tensor output(s) of the model. """ f_ph_normalized, y1_v1_y2_v2_ph = inputs # f_ph_normalized: (?, 1), y1_v1_y2_v2_ph: (?, 4) f_ts = tf.multiply( f_ph_normalized[:, 0], tf.compat.v1.constant(self.half_force_range, dtype=tf.float32, name='half_force_range'), name='f') # scalar-tensor multiplication # f_ts: (?,) y1_ph = y1_v1_y2_v2_ph[:, 0] # y1_ph: (?,) v1_ph = y1_v1_y2_v2_ph[:, 1] # v1_ph: (?,) y2_ph = y1_v1_y2_v2_ph[:, 2] # y2_ph: (?,) v2_ph = y1_v1_y2_v2_ph[:, 3] # v2_ph: (?,) l_pre_var = parameter( input_var=y1_v1_y2_v2_ph, length=self.n_segments, # initializer=tf.constant_initializer(self.l_pre_init), initializer=tf.random_uniform_initializer( minval=self.l_pre_init_lb, maxval=self.l_pre_init_ub), trainable=True, name='l_pre') l_segment_ts = tf.math.add( tf.math.sigmoid(l_pre_var) * tf.compat.v1.constant( self.l_range, dtype=tf.float32, name='l_range'), tf.compat.v1.constant(self.l_lb, dtype=tf.float32, name='l_lb'), name='l') self.l_ts = tf.math.reduce_sum(l_segment_ts, axis=-1) f1_ts = 0.5 * self.k_interface * ( y2_ph - y1_ph - self.l_ts) + 0.5 * self.b_interface * ( v2_ph - v1_ph) # see the notes for the derivation f2_ts = -f1_ts # the bar has no mass f1_f2_f_ts = tf.stack([f1_ts, f2_ts, f_ts], axis=1, name='f1_f2_f') self.debug_ts = self.l_ts log_std_var = parameter( input_var= y1_v1_y2_v2_ph, # actually not linked to the input, this is just to match the dimension of the inputs for batches length=3, initializer=tf.constant_initializer(self.f1_f2_f_log_std_init), trainable=True, name='log_std') # shape: (?, 3) return f1_f2_f_ts, log_std_var