def __init__(self, name, input_dim, output_dim, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.relu, output_nonlinearity=None, input_var=None, **kwargs): Serializable.quick_init(self, locals()) self.input_dim = input_dim self.output_dim = output_dim self.name = name self.input_var = input_var self.hidden_sizes = hidden_sizes self.hidden_nonlinearity = hidden_nonlinearity self.output_nonlinearity = output_nonlinearity self.batch_normalization = kwargs.get('batch_normalization', False) self._params = None self._assign_ops = None self._assign_phs = None
def __init__(self, *args, **kwargs): # store the init args for serialization and call the super constructors Serializable.quick_init(self, locals()) Layer.__init__(self, *args, **kwargs) self._cell_type = kwargs.get('cell_type', 'gru') self.state_var = kwargs.get('state_var', None) self.build_graph()
def __init__(self, *args, **kwargs): # store the init args for serialization and call the super constructors Serializable.quick_init(self, locals()) Layer.__init__(self, *args, **kwargs) self.num_filters = kwargs.get('num_filters') self.kernel_sizes = kwargs.get('kernel_sizes') self.strides = kwargs.get('strides') self.hidden_nonlinearity = kwargs.get('hidden_nonlinearity') self.output_nonlinearity = kwargs.get('output_nonlinearity') self.build_graph()
def __init__( self, env, policy, num_rollouts, max_path_length, n_parallel=1, vae=None, ): Serializable.quick_init(self, locals()) super(Sampler, self).__init__(env, policy, n_parallel, max_path_length) self.total_samples = num_rollouts * max_path_length self.n_parallel = n_parallel self.total_timesteps_sampled = 0 self.vae = vae # setup vectorized environment if self.n_parallel > 1: self.vec_env = ParallelEnvExecutor(env, n_parallel, num_rollouts, self.max_path_length) else: self.vec_env = IterativeEnvExecutor(env, num_rollouts, self.max_path_length)
def __init__( self, env, scale_reward=1., normalize_obs=False, normalize_reward=False, obs_alpha=0.001, reward_alpha=0.001, normalization_scale=1., ): Serializable.quick_init(self, locals()) self._scale_reward = 1 self._wrapped_env = env self._normalize_obs = normalize_obs self._normalize_reward = normalize_reward self._obs_alpha = obs_alpha self._obs_mean = np.zeros(self.observation_space.shape) self._obs_var = np.ones(self.observation_space.shape) self._reward_alpha = reward_alpha self._reward_mean = 0. self._reward_var = 1. self._normalization_scale = normalization_scale
def __init__(self, *args, **kwargs): # store the init args for serialization and call the super constructors Serializable.quick_init(self, locals()) Layer.__init__(self, *args, **kwargs) self.build_graph()
def __init__( self, name, env, hidden_sizes=(500, 500), hidden_nonlinearity="tanh", output_nonlinearity=None, batch_size=500, learning_rate=0.001, weight_normalization=True, normalize_input=True, optimizer=tf.train.AdamOptimizer, valid_split_ratio=0.2, rolling_average_persitency=0.99, buffer_size=100000, ): Serializable.quick_init(self, locals()) self.normalization = None self.normalize_input = normalize_input self.use_reward_model = False self.buffer_size = buffer_size self.name = name self.hidden_sizes = hidden_sizes self._dataset_train = None self._dataset_test = None self.next_batch = None self.valid_split_ratio = valid_split_ratio self.rolling_average_persitency = rolling_average_persitency self.hidden_nonlinearity = hidden_nonlinearity = self._activations[ hidden_nonlinearity] self.output_nonlinearity = output_nonlinearity = self._activations[ output_nonlinearity] with tf.variable_scope(name): self.batch_size = batch_size self.learning_rate = learning_rate # determine dimensionality of state and action space self.obs_space_dims = env.observation_space.shape[0] self.action_space_dims = env.action_space.shape[0] # placeholders self.obs_ph = tf.placeholder(tf.float32, shape=(None, self.obs_space_dims)) self.act_ph = tf.placeholder(tf.float32, shape=(None, self.action_space_dims)) self.delta_ph = tf.placeholder(tf.float32, shape=(None, self.obs_space_dims)) self._create_stats_vars() # concatenate action and observation --> NN input self.nn_input = tf.concat([self.obs_ph, self.act_ph], axis=1) # create MLP mlp = MLP(name, output_dim=self.obs_space_dims, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, input_var=self.nn_input, input_dim=self.obs_space_dims + self.action_space_dims, weight_normalization=weight_normalization) self.delta_pred = mlp.output_var # define loss and train_op self.loss = tf.reduce_mean( tf.linalg.norm(self.delta_ph - self.delta_pred, axis=-1)) self.optimizer = optimizer(self.learning_rate) self.train_op = self.optimizer.minimize(self.loss) # tensor_utils self.f_delta_pred = compile_function([self.obs_ph, self.act_ph], self.delta_pred) self._networks = [mlp]
def __init__( self, name, env, num_models=5, hidden_sizes=(512, 512), hidden_nonlinearity='swish', output_nonlinearity=None, batch_size=500, learning_rate=0.001, weight_normalization=False, # Doesn't work normalize_input=True, optimizer=tf.train.AdamOptimizer, valid_split_ratio=0.2, # 0.1 rolling_average_persitency=0.99, buffer_size=50000, loss_str='MSE', ): Serializable.quick_init(self, locals()) max_logvar = 1 min_logvar = 0.1 self.normalization = None self.normalize_input = normalize_input self.next_batch = None self.valid_split_ratio = valid_split_ratio self.rolling_average_persitency = rolling_average_persitency self.buffer_size_train = int(buffer_size * (1 - valid_split_ratio)) self.buffer_size_test = int(buffer_size * valid_split_ratio) self.batch_size = batch_size self.learning_rate = learning_rate self.num_models = num_models self.hidden_sizes = hidden_sizes self.name = name self._dataset_train = None self._dataset_test = None # determine dimensionality of state and action space self.obs_space_dims = obs_space_dims = env.observation_space.shape[0] self.action_space_dims = action_space_dims = env.action_space.shape[0] self.timesteps_counter = 0 self.used_timesteps_counter = 0 self.hidden_nonlinearity = hidden_nonlinearity = self._activations[ hidden_nonlinearity] self.output_nonlinearity = output_nonlinearity = self._activations[ output_nonlinearity] """ computation graph for training and simple inference """ with tf.variable_scope(name, reuse=tf.AUTO_REUSE): # placeholders self.obs_ph = tf.placeholder(tf.float32, shape=(None, obs_space_dims)) self.act_ph = tf.placeholder(tf.float32, shape=(None, action_space_dims)) self.delta_ph = tf.placeholder(tf.float32, shape=(None, obs_space_dims)) self._create_stats_vars() # concatenate action and observation --> NN input self.nn_input = tf.concat([self.obs_ph, self.act_ph], axis=1) obs_ph = tf.split(self.nn_input, self.num_models, axis=0) # create MLP mlps = [] delta_preds = [] self.obs_next_pred = [] for i in range(num_models): with tf.variable_scope('model_{}'.format(i), reuse=tf.AUTO_REUSE): mlp = MLP( name + '/model_{}'.format(i), output_dim=obs_space_dims, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, input_var=obs_ph[i], input_dim=obs_space_dims + action_space_dims, ) mlps.append(mlp) delta_preds.append(mlp.output_var) self.delta_pred = tf.stack( delta_preds, axis=2) # shape: (batch_size, ndim_obs, n_models) # define loss and train_op if loss_str == 'L2': self.loss = tf.reduce_mean( tf.linalg.norm(self.delta_ph[:, :, None] - self.delta_pred, axis=1)) elif loss_str == 'MSE': self.loss = tf.reduce_mean( (self.delta_ph[:, :, None] - self.delta_pred)**2) else: raise NotImplementedError self.optimizer = optimizer(learning_rate=self.learning_rate) self.train_op = self.optimizer.minimize(self.loss) # tensor_utils self.f_delta_pred = compile_function([self.obs_ph, self.act_ph], self.delta_pred) """ computation graph for inference where each of the models receives a different batch""" with tf.variable_scope(name, reuse=True): # placeholders self.obs_model_batches_stack_ph = tf.placeholder( tf.float32, shape=(None, obs_space_dims)) self.act_model_batches_stack_ph = tf.placeholder( tf.float32, shape=(None, action_space_dims)) self.delta_model_batches_stack_ph = tf.placeholder( tf.float32, shape=(None, obs_space_dims)) # split stack into the batches for each model --> assume each model receives a batch of the same size self.obs_model_batches = tf.split(self.obs_model_batches_stack_ph, self.num_models, axis=0) self.act_model_batches = tf.split(self.act_model_batches_stack_ph, self.num_models, axis=0) self.delta_model_batches = tf.split( self.delta_model_batches_stack_ph, self.num_models, axis=0) # reuse previously created MLP but each model receives its own batch delta_preds = [] self.obs_next_pred = [] self.loss_model_batches = [] self.train_op_model_batches = [] for i in range(num_models): with tf.variable_scope('model_{}'.format(i), reuse=True): # concatenate action and observation --> NN input nn_input = tf.concat( [self.obs_model_batches[i], self.act_model_batches[i]], axis=1) mlp = MLP(name + '/model_{}'.format(i), output_dim=obs_space_dims, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, input_var=nn_input, input_dim=obs_space_dims + action_space_dims, weight_normalization=weight_normalization) delta_preds.append(mlp.output_var) # define loss and train_op if loss_str == 'L2': loss = tf.reduce_mean( tf.linalg.norm(self.delta_model_batches[i] - mlp.output_var, axis=1)) elif loss_str == 'MSE': loss = tf.reduce_mean( (self.delta_model_batches[i] - mlp.output_var)**2) else: raise NotImplementedError self.loss_model_batches.append(loss) self.train_op_model_batches.append( optimizer(learning_rate=self.learning_rate).minimize(loss)) self.delta_pred_model_batches_stack = tf.concat( delta_preds, axis=0) # shape: (batch_size_per_model*num_models, ndim_obs) # tensor_utils self.f_delta_pred_model_batches = compile_function([ self.obs_model_batches_stack_ph, self.act_model_batches_stack_ph ], self.delta_pred_model_batches_stack) self._networks = mlps