Exemplos de assignFromFlat em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: baselines.common.tf_util

Método / Função: assignFromFlat

Exemplos em hotexamples.com: 2

assignFromFlat em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de baselines.common.tf_util.assignFromFlat em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Exemplo n.º 1

0

Exibir arquivo

def _init(self, ob_space, ac_space, hid_layers=[], deterministic=True, diagonal=True, trainable_std=True, use_bias=True, use_critic=False, seed=None, verbose=True, hidden_W_init=U.normc_initializer(1.0), higher_mean_init=None, higher_logstd_init=tf.constant_initializer(np.log(0.11)), const_std_init=False): """Params: ob_space: task observation space ac_space : task action space hid__layers: list with width of each hidden layer deterministic: whether the actor is deterministic diagonal: whether the higher order policy has a diagonal covariance matrix use_bias: whether to include bias in neurons use_critic: whether to include a critic network seed: optional random seed """ # Check environment's shapes assert isinstance(ob_space, gym.spaces.Box) assert len(ac_space.shape) == 1 # Set seed if seed is not None: set_global_seeds(seed) # Set some attributes self.diagonal = diagonal self.use_bias = use_bias batch_length = None # Accepts a sequence of eps of arbitrary length self.ac_dim = ac_space.shape[0] self.ob_dim = ob_space.shape[0] self.linear = not hid_layers self.verbose = verbose self._ob = ob = U.get_placeholder( name="ob", dtype=tf.float32, shape=[None] + list(ob_space.shape)) # Actor (N.B.: weight initialization is irrelevant) with tf.variable_scope('actor'): last_out = ob for i, hid_size in enumerate(hid_layers): # Mlp feature extraction last_out = tf.nn.tanh( tf.layers.dense(last_out, hid_size, name='fc%i' % (i+1), kernel_initializer=hidden_W_init, use_bias=use_bias)) if deterministic and isinstance(ac_space, gym.spaces.Box): # Determinisitc action selection self.actor_mean = actor_mean = \ tf.layers.dense(last_out, ac_space.shape[0], name='action', kernel_initializer=hidden_W_init, use_bias=use_bias) else: raise NotImplementedError # Get actor flatten weights with tf.variable_scope('actor') as scope: self.actor_weights = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope.name) # flatten weights self.flat_actor_weights = tf.concat( [tf.reshape(w, [-1]) for w in self.actor_weights], axis=0) self._n_actor_weights = n_actor_weights = \ self.flat_actor_weights.shape[0] # Higher order policy (Gaussian) with tf.variable_scope('higher'): if higher_mean_init is None: # Initial means sampled from a normal distribution N(0,1) higher_mean_init = tf.where( tf.not_equal(self.flat_actor_weights, tf.constant(0, dtype=tf.float32)), tf.random_normal(shape=[n_actor_weights.value], stddev=0.01), tf.zeros(shape=[n_actor_weights])) # bias init always zero self.higher_mean = tf.get_variable( name='higher_mean', initializer=higher_mean_init, shape=self.flat_actor_weights.get_shape()) # Keep the weights'domain compact # self.higher_mean = higher_mean = tf.clip_by_value( # self.higher_mean, -1, 1, 'higher_mean_clipped') higher_mean = self.higher_mean if diagonal: if const_std_init: self.higher_logstd = higher_logstd = \ tf.get_variable( name='higher_logstd', initializer=higher_logstd_init, trainable=trainable_std) else: self.higher_logstd = higher_logstd = \ tf.get_variable( name='higher_logstd', shape=[n_actor_weights], initializer=higher_logstd_init, trainable=trainable_std) pdparam = tf.concat([higher_mean, higher_mean * 0. + higher_logstd], axis=0) self.pdtype = pdtype = \ DiagGaussianPdType(n_actor_weights.value) else: # Cholesky covariance matrix self.higher_logstd = higher_logstd = tf.get_variable( name='higher_logstd', shape=[n_actor_weights*(n_actor_weights + 1)//2], initializer=tf.initializers.constant(0.)) pdparam = tf.concat([higher_mean, higher_logstd], axis=0) self.pdtype = pdtype = CholeskyGaussianPdType( n_actor_weights.value) # Sample actor weights self.pd = pdtype.pdfromflat(pdparam) sampled_actor_params = self.pd.sample() symm_sampled_actor_params = self.pd.sample_symmetric() self._sample_actor_params = U.function([], [sampled_actor_params]) self._sample_symm_actor_params = U.function( [], list(symm_sampled_actor_params)) # Assign actor weights with tf.variable_scope('actor') as scope: actor_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope.name) self._use_sampled_actor_params = \ U.assignFromFlat(actor_params, sampled_actor_params) self._get_actor_params = U.GetFlat(actor_params) self._set_actor_params = U.SetFromFlat(actor_params) # Act self._action = action = actor_mean self._act = U.function([ob], [action]) # Manage higher policy weights with tf.variable_scope('higher') as scope: self._higher_params = higher_params = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope.name) self.flat_higher_params = tf.concat([tf.reshape(w, [-1]) for w in self._higher_params], axis=0) self._n_higher_params = self.flat_higher_params.shape[0] self._get_flat_higher_params = U.GetFlat(higher_params) self._set_higher_params = U.SetFromFlat(self._higher_params) # Evaluating self._actor_params_in = actor_params_in = \ U.get_placeholder(name='actor_params_in', dtype=tf.float32, shape=[batch_length] + [n_actor_weights]) self._rets_in = rets_in = \ U.get_placeholder(name='returns_in', dtype=tf.float32, shape=[batch_length]) ret_mean, ret_std = tf.nn.moments(rets_in, axes=[0]) self._get_ret_mean = U.function([self._rets_in], [ret_mean]) self._get_ret_std = U.function([self._rets_in], [ret_std]) self._logprobs = logprobs = self.pd.logp(actor_params_in) pgpe_times_n = U.flatgrad(logprobs*rets_in, higher_params) self._get_pgpe_times_n = U.function([actor_params_in, rets_in], [pgpe_times_n]) self._get_actor_mean = U.function([ob], [self.actor_mean]) self._get_higher_mean = U.function([ob], [self.higher_mean]) self._get_higher_std = U.function([], tf.exp([self.higher_logstd])) # Batch off-policy PGPE self._probs = tf.exp(logprobs) self._behavioral = None self._renyi_other = None # Renyi computation self._det_sigma = tf.exp(tf.reduce_sum(self.higher_logstd)) # Fisher computation (diagonal case) mean_fisher_diag = tf.exp(-2*self.higher_logstd) if trainable_std: cov_fisher_diag = mean_fisher_diag*0 + 2 self._fisher_diag = tf.concat( [mean_fisher_diag, cov_fisher_diag], axis=0) else: self._fisher_diag = mean_fisher_diag self._get_fisher_diag = U.function([], [self._fisher_diag])

Exemplo n.º 2

0

Exibir arquivo

Arquivo: weight_hyperpolicy.py Projeto: T3p/baselines

def _init(self, ob_space, ac_space, hid_layers=[], deterministic=True, diagonal=True, use_bias=False, use_critic=False, seed=None, verbose=True, zero_init=False): """Params: ob_space: task observation space ac_space : task action space hid__layers: list with width of each hidden layer deterministic: whether the actor is deterministic diagonal: whether the higher order policy has a diagonal covariance matrix use_bias: whether to include bias in neurons use_critic: whether to include a critic network seed: optional random seed """ assert isinstance(ob_space, gym.spaces.Box) assert len(ac_space.shape) == 1 self.diagonal = diagonal self.use_bias = use_bias batch_length = None #Accepts a sequence of episodes of arbitrary length self.observation_space = ob_space self.action_space = ac_space self.ac_dim = ac_space.shape[0] self.ob_dim = ob_space.shape[0] self.hid_layers = hid_layers self.deterministic = deterministic self.use_critic = use_critic self.linear = not hid_layers self.verbose = verbose if seed is not None: set_global_seeds(seed) self._ob = ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[None] + list(ob_space.shape)) #Critic (normally not used) if use_critic: with tf.variable_scope('critic'): last_out = ob for i, hid_size in enumerate(hid_layers): last_out = tf.nn.tanh( tf.layers.dense( last_out, hid_size, name="fc%i" % (i + 1), kernel_initializer=U.normc_initializer(1.0))) self.vpred = tf.layers.dense( last_out, 1, name='final', kernel_initializer=U.normc_initializer(1.0))[:, 0] #Actor (N.B.: weight initialization is irrelevant) with tf.variable_scope('actor'): last_out = ob for i, hid_size in enumerate(hid_layers): #Mlp feature extraction last_out = tf.nn.tanh( tf.layers.dense( last_out, hid_size, name='fc%i' % (i + 1), kernel_initializer=tf.initializers.constant(0.), use_bias=use_bias)) if deterministic and isinstance(ac_space, gym.spaces.Box): #Determinisitc action selection self.actor_mean = actor_mean = tf.layers.dense( last_out, ac_space.shape[0], name='action', kernel_initializer=tf.initializers.constant(0.), use_bias=use_bias) else: raise NotImplementedError #Currently supports only deterministic action policies #Higher order policy (Gaussian) with tf.variable_scope('actor') as scope: self.actor_weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, \ scope=scope.name) self.flat_actor_weights = tf.concat([tf.reshape(w, [-1]) for w in \ self.actor_weights], axis=0) #flatten self._n_actor_weights = n_actor_weights = self.flat_actor_weights.shape[ 0] with tf.variable_scope('higher'): if zero_init: higher_mean_init = tf.where( tf.not_equal(self.flat_actor_weights, tf.constant(0, dtype=tf.float32)), tf.zeros(shape=[n_actor_weights.value]), tf.zeros(shape=[n_actor_weights])) else: #Initial means sampled from a normal distribution N(0,1) higher_mean_init = tf.where( tf.not_equal(self.flat_actor_weights, tf.constant(0, dtype=tf.float32)), tf.random_normal(shape=[n_actor_weights.value], stddev=0.01), tf.zeros(shape=[n_actor_weights])) self.higher_mean = higher_mean = tf.get_variable( name='higher_mean', initializer=higher_mean_init) if diagonal: #Diagonal covariance matrix; all stds initialized to 0 self.higher_logstd = higher_logstd = tf.get_variable( name='higher_logstd', shape=[n_actor_weights], initializer=tf.initializers.constant(0.)) pdparam = tf.concat( [higher_mean, higher_mean * 0. + higher_logstd], axis=0) self.pdtype = pdtype = DiagGaussianPdType( n_actor_weights.value) else: #Cholesky covariance matrix self.higher_logstd = higher_logstd = tf.get_variable( name='higher_logstd', shape=[n_actor_weights * (n_actor_weights + 1) // 2], initializer=tf.initializers.constant(0.)) pdparam = tf.concat([higher_mean, higher_logstd], axis=0) self.pdtype = pdtype = CholeskyGaussianPdType( n_actor_weights.value) #Sample actor weights self.pd = pdtype.pdfromflat(pdparam) sampled_actor_params = self.pd.sample() symm_sampled_actor_params = self.pd.sample_symmetric() self._sample_symm_actor_params = U.function( [], list(symm_sampled_actor_params)) self._sample_actor_params = U.function([], [sampled_actor_params]) #Assign actor weights with tf.variable_scope('actor') as scope: actor_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, \ scope=scope.name) self._use_sampled_actor_params = U.assignFromFlat( actor_params, sampled_actor_params) self._set_actor_params = U.SetFromFlat(actor_params) self._get_actor_params = U.GetFlat(actor_params) #Act self._action = action = actor_mean self._act = U.function([ob], [action]) #Higher policy weights with tf.variable_scope('higher') as scope: self._higher_params = higher_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, \ scope=scope.name) self.flat_higher_params = tf.concat([tf.reshape(w, [-1]) for w in \ self._higher_params], axis=0) #flatten self._n_higher_params = self.flat_higher_params.shape[0] self._get_flat_higher_params = U.GetFlat(higher_params) self._set_higher_params = U.SetFromFlat(self._higher_params) #Batch PGPE self._actor_params_in = actor_params_in = \ U.get_placeholder(name='actor_params_in', dtype=tf.float32, shape=[batch_length] + [n_actor_weights]) self._rets_in = rets_in = U.get_placeholder(name='returns_in', dtype=tf.float32, shape=[batch_length]) ret_mean, ret_std = tf.nn.moments(rets_in, axes=[0]) self._get_ret_mean = U.function([self._rets_in], [ret_mean]) self._get_ret_std = U.function([self._rets_in], [ret_std]) self._logprobs = logprobs = self.pd.logp(actor_params_in) pgpe_times_n = U.flatgrad(logprobs * rets_in, higher_params) self._get_pgpe_times_n = U.function([actor_params_in, rets_in], [pgpe_times_n]) #One-episode PGPE #Used N times to compute the baseline -> can we do better? self._one_actor_param_in = one_actor_param_in = U.get_placeholder( name='one_actor_param_in', dtype=tf.float32, shape=[n_actor_weights]) one_logprob = self.pd.logp(one_actor_param_in) score = U.flatgrad(one_logprob, higher_params) score_norm = tf.norm(score) self._get_score = U.function([one_actor_param_in], [score]) self._get_score_norm = U.function([one_actor_param_in], [score_norm]) #Batch off-policy PGPE self._probs = tf.exp(logprobs) self._behavioral = None self._renyi_other = None #One episode off-PGPE self._one_prob = tf.exp(one_logprob) #Renyi computation self._det_sigma = tf.exp(tf.reduce_sum(self.higher_logstd)) #Fisher computation (diagonal case) mean_fisher_diag = tf.exp(-2 * self.higher_logstd) cov_fisher_diag = mean_fisher_diag * 0 + 2 self._fisher_diag = tf.concat([mean_fisher_diag, cov_fisher_diag], axis=0) self._get_fisher_diag = U.function([], [self._fisher_diag]) #Multiple importance sampling self._memory = None