def __init__(self, task='hfield', reset_every_episode=False, reward=True, max_timesteps=1000, *args, **kwargs): Serializable.quick_init(self, locals()) self.cripple_mask = None self.reset_every_episode = reset_every_episode self.first = True self.timesteps = 0 self.max_timesteps = max_timesteps MujocoEnv.__init__( self, os.path.join(os.path.abspath(os.path.dirname(__file__)), "assets", "half_cheetah_hfield.xml")) task = None if task == 'None' else task self._init_geom_rgba = self.model.geom_rgba.copy() self._init_geom_contype = self.model.geom_contype.copy() self._init_geom_size = self.model.geom_size.copy() self._init_geom_pos = self.model.geom_pos.copy() self.dt = self.model.opt.timestep assert task in [None, 'hfield', 'hill', 'basin', 'steep', 'gentle'] self.task = task self.x_walls = np.array([250, 260, 261, 270, 280, 285]) self.height_walls = np.array([0.2, 0.2, 0.2, 0.2, 0.2, 0.2]) self.height = 0.8 self.width = 15
def __init__(self, task='damping', reset_every_episode=False, max_timesteps=1000): Serializable.quick_init(self, locals()) self.reset_every_episode = reset_every_episode self.first = True self.timesteps = 0 self.max_timesteps = max_timesteps MujocoEnv.__init__( self, os.path.join(os.path.abspath(os.path.dirname(__file__)), "assets", "half_cheetah_blocks.xml")) task = None if task == 'None' else task self.cripple_mask = np.ones(self.action_space.shape) self._init_geom_rgba = self.model.geom_rgba.copy() self._init_geom_contype = self.model.geom_contype.copy() self._init_geom_size = self.model.geom_size.copy() self._init_geom_pos = self.model.geom_pos.copy() self.dt = self.model.opt.timestep assert task in [None, 'damping'] self.task = task
def __init__(self, name, input_dim, output_dim, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.relu, output_nonlinearity=None, input_var=None, params=None, **kwargs): Serializable.quick_init(self, locals()) self.input_dim = input_dim self.output_dim = output_dim self.name = name self.input_var = input_var self.hidden_sizes = hidden_sizes self.hidden_nonlinearity = hidden_nonlinearity self.output_nonlinearity = output_nonlinearity self.batch_normalization = kwargs.get('batch_normalization', False) self._params = params self._assign_ops = None self._assign_phs = None
def __init__(self, task='force', reset_every_episode=False, fixed_goal=False): Serializable.quick_init(self, locals()) self.reset_every_episode = reset_every_episode self.first = True self.fixed_goal = fixed_goal MujocoEnv.__init__( self, os.path.join(os.path.abspath(os.path.dirname(__file__)), "assets", "arm_7dof.xml")) task = None if task == 'None' else task self.cripple_mask = np.ones(self.action_space.shape) self._init_geom_rgba = self.model.geom_rgba.copy() self._init_geom_contype = self.model.geom_contype.copy() self._init_geom_size = self.model.geom_size.copy() self._init_body_pos = self.model.body_pos.copy() self._init_body_masses = self.model.body_mass.copy() self._init_geom_pos = self.model.geom_pos.copy() self.dt = self.model.opt.timestep assert task in [None, 'cripple', 'damping', 'mass', 'force'] self.task = task
def __init__(self, name, env, dynamics_model, reward_model=None, discount=1, use_cem=False, n_candidates=1024, horizon=10, num_cem_iters=8, percent_elites=0.05, use_reward_model=False): self.dynamics_model = dynamics_model self.reward_model = reward_model self.discount = discount self.n_candidates = n_candidates self.horizon = horizon self.use_cem = use_cem self.num_cem_iters = num_cem_iters self.percent_elites = percent_elites self.env = env self.use_reward_model = use_reward_model self._hidden_state = None self.unwrapped_env = env while hasattr(self.unwrapped_env, 'wrapped_env'): self.unwrapped_env = self.unwrapped_env.wrapped_env # make sure that env has reward function if not self.use_reward_model: assert hasattr(self.unwrapped_env, 'reward'), "env must have a reward function" Serializable.quick_init(self, locals()) super(RNNMPCController, self).__init__(env=env)
def __init__(self, *args, **kwargs): # store the init args for serialization and call the super constructors Serializable.quick_init(self, locals()) Layer.__init__(self, *args, **kwargs) self._cell_type = kwargs.get('cell_type', 'gru') self.state_var = kwargs.get('state_var', None) self.build_graph()
def __init__(self, observation_space, action_space): """ :type observation_space: Space :type action_space: Space """ Serializable.quick_init(self, locals()) self._observation_space = observation_space self._action_space = action_space
def __getstate__(self): # state = LayersPowered.__getstate__(self) state = dict() state['init_args'] = Serializable.__getstate__(self) state['normalization'] = self.normalization state['networks'] = [nn.__getstate__() for nn in self._networks] return state
def __init__(self, task='rotorless', reset_every_episode=False, port=19999): Serializable.quick_init(self, locals()) ## missing line: task mask? self.reset_every_episode = reset_every_episode self.first = True VREPQuad.__init__(self, ip='127.0.0.1', port=port) task = None if task == 'None' else task # Allow to disable a rotor self.task_mask = np.ones(self.action_space.shape) assert task in [None, 'rotorless'] self.task = task self.actiondim = self.action_space.shape[0]
def __init__(self, task='hfield', max_episode_steps=200, reset_every_episode=False, reward=True, frame_skip=1, *args, **kwargs): #print(task) Serializable.quick_init(self, locals()) self.cripple_mask = None self.reset_every_episode = reset_every_episode self.first = True print("frame_skip :", frame_skip) MujocoEnv.__init__(self, os.path.join( os.path.abspath(os.path.dirname(__file__)), "assets", "half_cheetah_hfield.xml"), frame_skip=frame_skip) task = None if task == 'None' else task # rgba when material is omitted (ngeom x 4) self._init_geom_rgba = self.model.geom_rgba.copy() # geom_contype : geom contact type (ngeom x 1) self._init_geom_contype = self.model.geom_contype.copy() # geom-specific size parameters (ngeom x 3) self._init_geom_size = self.model.geom_size.copy() # local position offset rel. to body self.init_geom_pos = self.model.geom_pos.copy() # Opt : options for mj_setLengthRange # timestep : simulation timestep; 0: use mjOption.timestep ## self.dt = self.model.opt.timestep assert task in [None, 'hfield', 'hill', 'basin', 'steep', 'gentle'] self.task = task self.x_walls = np.array([250, 260, 261, 270, 280, 285]) self.height_walls = np.array([0.2, 0.2, 0.2, 0.2, 0.2, 0.2]) self.height = 0.8 self.width = 15 self._max_episode_steps = max_episode_steps
def __init__(self, task='cripple', reset_every_episode=False): Serializable.quick_init(self, locals()) self.cripple_mask = None self.first = True self.task = 'cripple' self.crippled_leg = 0 self.prev_torso = None self.prev_qpos = None self.first = True MujocoEnv.__init__( self, os.path.join(os.path.abspath(os.path.dirname(__file__)), "assets", "half_cheetah_disabled.xml")) self._init_geom_rgba = self.model.geom_rgba.copy() self._init_geom_contype = self.model.geom_contype.copy() self._init_geom_size = self.model.geom_size.copy() self._init_geom_pos = self.model.geom_pos.copy() self.dt = self.model.opt.timestep self.cripple_mask = np.ones(self.action_space.shape) self.reward_range = (-np.inf, np.inf) utils.EzPickle.__init__(self, locals())
def __init__(self, env, scale_reward=1., normalize_obs=False, normalize_reward=False, obs_alpha=0.001, reward_alpha=0.001, normalization_scale=1., ): Serializable.quick_init(self, locals()) self._scale_reward = 1 self._wrapped_env = env self._normalize_obs = normalize_obs self._normalize_reward = normalize_reward self._obs_alpha = obs_alpha self._obs_mean = np.zeros(self.observation_space.shape) self._obs_var = np.ones(self.observation_space.shape) self._reward_alpha = reward_alpha self._reward_mean = 0. self._reward_var = 1. self._normalization_scale = normalization_scale
def __setstate__(self, state): Serializable.__setstate__(self, state['init_args'])
def __getstate__(self): state = dict() state['init_args'] = Serializable.__getstate__(self) return state
def __init__(self, *args, **kwargs): # store the init args for serialization and call the super constructors Serializable.quick_init(self, locals()) Layer.__init__(self, *args, **kwargs) self.build_graph()
def __setstate__(self, state): Serializable.__setstate__(self, state['init_args']) self.normalization = state['normalization'] for i in range(len(self._networks)): self._networks[i].__setstate__(state['networks'][i])
def __init__(self, name, env, hidden_sizes=(512, 512), meta_batch_size=10, hidden_nonlinearity=tf.nn.relu, output_nonlinearity=None, batch_size=500, learning_rate=0.001, inner_learning_rate=0.1, normalize_input=True, optimizer=tf.train.AdamOptimizer, valid_split_ratio=0.2, rolling_average_persitency=0.99, ): Serializable.quick_init(self, locals()) self.normalization = None self.normalize_input = normalize_input self.next_batch = None self.meta_batch_size = meta_batch_size self.valid_split_ratio = valid_split_ratio self.rolling_average_persitency = rolling_average_persitency self.batch_size = batch_size self.learning_rate = learning_rate self.inner_learning_rate = inner_learning_rate self.name = name self._dataset_train = None self._dataset_test = None self._prev_params = None self._adapted_param_values = None # determine dimensionality of state and action space self.obs_space_dims = obs_space_dims = env.observation_space.shape[0] self.action_space_dims = action_space_dims = env.action_space.shape[0] hidden_nonlinearity = self._activations[hidden_nonlinearity] output_nonlinearity = self._activations[output_nonlinearity] """ ------------------ Pre-Update Graph + Adaptation ----------------------- """ with tf.variable_scope(name): # Placeholders self.obs_ph = tf.placeholder(tf.float32, shape=(None, obs_space_dims)) self.act_ph = tf.placeholder(tf.float32, shape=(None, action_space_dims)) self.delta_ph = tf.placeholder(tf.float32, shape=(None, obs_space_dims)) # Concatenate action and observation --> NN input self.nn_input = tf.concat([self.obs_ph, self.act_ph], axis=1) # Create MLP mlp = MLP(name, output_dim=obs_space_dims, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, input_var=self.nn_input, input_dim=obs_space_dims+action_space_dims) self.delta_pred = mlp.output_var # shape: (batch_size, ndim_obs, n_models) self.loss = tf.reduce_mean(tf.square(self.delta_ph - self.delta_pred)) self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) self.adaptation_sym = tf.train.GradientDescentOptimizer(self.inner_learning_rate).minimize(self.loss) # Tensor_utils self.f_delta_pred = tensor_utils.compile_function([self.obs_ph, self.act_ph], self.delta_pred) """ --------------------------- Meta-training Graph ---------------------------------- """ nn_input_per_task = tf.split(self.nn_input, self.meta_batch_size, axis=0) delta_per_task = tf.split(self.delta_ph, self.meta_batch_size, axis=0) pre_input_per_task, post_input_per_task = zip(*[tf.split(nn_input, 2, axis=0) for nn_input in nn_input_per_task]) pre_delta_per_task, post_delta_per_task = zip(*[tf.split(delta, 2, axis=0) for delta in delta_per_task]) pre_losses = [] post_losses = [] self._adapted_params = [] for idx in range(self.meta_batch_size): with tf.variable_scope(name + '/pre_model_%d' % idx, reuse=tf.AUTO_REUSE): pre_mlp = MLP(name, output_dim=obs_space_dims, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, input_var=pre_input_per_task[idx], input_dim=obs_space_dims + action_space_dims, params=mlp.get_params()) pre_delta_pred = pre_mlp.output_var pre_loss = tf.reduce_mean(tf.square(pre_delta_per_task[idx] - pre_delta_pred)) adapted_params = self._adapt_sym(pre_loss, pre_mlp.get_params()) self._adapted_params.append(adapted_params) with tf.variable_scope(name + '/post_model_%d' % idx, reuse=tf.AUTO_REUSE): post_mlp = MLP(name, output_dim=obs_space_dims, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, input_var=post_input_per_task[idx], params=adapted_params, input_dim=obs_space_dims + action_space_dims) post_delta_pred = post_mlp.output_var post_loss = tf.reduce_mean(tf.square(post_delta_per_task[idx] - post_delta_pred)) pre_losses.append(pre_loss) post_losses.append(post_loss) self.pre_loss = tf.reduce_mean(pre_losses) self.post_loss = tf.reduce_mean(post_losses) self.train_op = optimizer(self.learning_rate).minimize(self.post_loss) """ --------------------------- Post-update Inference Graph --------------------------- """ with tf.variable_scope(name + '_ph_graph'): self.post_update_delta = [] self.network_phs_meta_batch = [] nn_input_per_task = tf.split(self.nn_input, self.meta_batch_size, axis=0) for idx in range(meta_batch_size): with tf.variable_scope('task_%i' % idx): network_phs = self._create_placeholders_for_vars(mlp.get_params()) self.network_phs_meta_batch.append(network_phs) mlp_meta_batch = MLP(name, output_dim=obs_space_dims, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, params=network_phs, input_var=nn_input_per_task[idx], input_dim=obs_space_dims + action_space_dims, ) self.post_update_delta.append(mlp_meta_batch.output_var) self._networks = [mlp]
def __setstate__(self, d): Serializable.__setstate__(self, d) self._obs_mean = d["_obs_mean"] self._obs_var = d["_obs_var"]
def __getstate__(self): d = Serializable.__getstate__(self) d["_obs_mean"] = self._obs_mean d["_obs_var"] = self._obs_var return d
def __setstate__(self, state): # LayersPowered.__setstate__(self, state) Serializable.__setstate__(self, state['init_args'])
def __getstate__(self): # state = LayersPowered.__getstate__(self) state = dict() state['init_args'] = Serializable.__getstate__(self) return state
def __init__( self, name, env, hidden_sizes=(512, ), cell_type='lstm', hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, batch_size=500, learning_rate=0.001, normalize_input=True, optimizer=tf.train.AdamOptimizer, valid_split_ratio=0.2, rolling_average_persitency=0.99, backprop_steps=50, ): Serializable.quick_init(self, locals()) self.recurrent = True self.normalization = None self.normalize_input = normalize_input self.next_batch = None self.valid_split_ratio = valid_split_ratio self.rolling_average_persitency = rolling_average_persitency self.backprop_steps = backprop_steps self.batch_size = batch_size self.learning_rate = learning_rate self.name = name self._dataset_train = None self._dataset_test = None # Determine dimensionality of state and action space self.obs_space_dims = obs_space_dims = env.observation_space.shape[0] self.action_space_dims = action_space_dims = env.action_space.shape[0] """ computation graph for training and simple inference """ with tf.variable_scope(name): # Placeholders self.obs_ph = tf.placeholder(tf.float32, shape=(None, None, obs_space_dims), name='obs_ph') self.act_ph = tf.placeholder(tf.float32, shape=(None, None, action_space_dims), name='act_ph') self.delta_ph = tf.placeholder(tf.float32, shape=(None, None, obs_space_dims), name='delta_ph') # Concatenate action and observation --> NN input self.nn_input = tf.concat([self.obs_ph, self.act_ph], axis=2) # Create RNN rnns = [] delta_preds = [] self.obs_next_pred = [] self.hidden_state_ph = [] self.next_hidden_state_var = [] self.cell = [] with tf.variable_scope('rnn_model'): rnn = RNN( name, output_dim=self.obs_space_dims, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, input_var=self.nn_input, input_dim=self.obs_space_dims + self.action_space_dims, cell_type=cell_type, ) self.delta_pred = rnn.output_var self.hidden_state_ph = rnn.state_var self.next_hidden_state_var = rnn.next_state_var self.cell = rnn.cell self._zero_state = self.cell.zero_state(1, tf.float32) self.loss = tf.reduce_mean( tf.square(self.delta_pred - self.delta_ph)) params = list(rnn.get_params().values()) self._gradients_ph = [ tf.placeholder(shape=param.shape, dtype=tf.float32) for param in params ] self._gradients_vars = tf.gradients(self.loss, params) applied_gradients = zip(self._gradients_ph, params) self.train_op = optimizer( self.learning_rate).apply_gradients(applied_gradients) # Tensor_utils self.f_delta_pred = tensor_utils.compile_function( [self.obs_ph, self.act_ph, self.hidden_state_ph], [self.delta_pred, self.next_hidden_state_var]) self._networks = [rnn]
def __init__( self, name, env, hidden_sizes=(512, 512), hidden_nonlinearity=tf.nn.relu, output_nonlinearity=None, batch_size=500, learning_rate=0.001, normalize_input=True, optimizer=tf.train.AdamOptimizer, valid_split_ratio=0.2, rolling_average_persitency=0.99, ): Serializable.quick_init(self, locals()) self.normalization = None self.normalize_input = normalize_input self.next_batch = None self.valid_split_ratio = valid_split_ratio self.rolling_average_persitency = rolling_average_persitency self.batch_size = batch_size self.learning_rate = learning_rate self.name = name self._dataset_train = None self._dataset_test = None # determine dimensionality of state and action space self.obs_space_dims = obs_space_dims = env.observation_space.shape[0] self.action_space_dims = action_space_dims = env.action_space.shape[0] hidden_nonlinearity = self._activations[hidden_nonlinearity] output_nonlinearity = self._activations[output_nonlinearity] with tf.variable_scope(name): # placeholders self.obs_ph = tf.placeholder(tf.float32, shape=(None, obs_space_dims)) self.act_ph = tf.placeholder(tf.float32, shape=(None, action_space_dims)) self.delta_ph = tf.placeholder(tf.float32, shape=(None, obs_space_dims)) # concatenate action and observation --> NN input self.nn_input = tf.concat([self.obs_ph, self.act_ph], axis=1) # create MLP with tf.variable_scope('ff_model'): mlp = MLP(name, output_dim=obs_space_dims, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, output_nonlinearity=output_nonlinearity, input_var=self.nn_input, input_dim=obs_space_dims + action_space_dims) self.delta_pred = mlp.output_var # shape: (batch_size, ndim_obs, n_models) self.loss = tf.reduce_mean( tf.square(self.delta_ph - self.delta_pred)) self.optimizer = optimizer(self.learning_rate) self.train_op = self.optimizer.minimize(self.loss) # tensor_utils self.f_delta_pred = tensor_utils.compile_function( [self.obs_ph, self.act_ph], self.delta_pred) self._networks = [mlp]
def __init__(self, env): Serializable.quick_init(self, locals()) self.env = env while hasattr(self.env, 'wrapped_env'): self.env = self.env.wrapped_env