def __init__( self, action_space, mu=0, theta=0.15, max_sigma=0.3, min_sigma=None, decay_period=100000, ): Serializable.quick_init(self, locals()) if min_sigma is None: min_sigma = max_sigma self.mu = mu self.theta = theta self.sigma = max_sigma self._max_sigma = max_sigma if min_sigma is None: min_sigma = max_sigma self._min_sigma = min_sigma self._decay_period = decay_period self.dim = np.prod(action_space.low.shape) self.low = action_space.low self.high = action_space.high self.state = np.ones(self.dim) * self.mu self.reset()
def __init__( self, env: MultitaskEnv, give_goal_difference=False, ): # self._wrapped_env needs to be called first because # Serializable.quick_init calls getattr, on this class. And the # implementation of getattr (see below) calls self._wrapped_env. # Without setting this first, the call to self._wrapped_env would call # getattr again (since it's not set yet) and therefore loop forever. self._wrapped_env = env # Or else serialization gets delegated to the wrapped_env. Serialize # this env separately from the wrapped_env. self._serializable_initialized = False self._wrapped_obs_dim = env.observation_space.low.size self.give_goal_difference = give_goal_difference Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) wrapped_low = self.observation_space.low low = np.hstack(( wrapped_low, min(wrapped_low) * np.ones(self._wrapped_env.goal_dim) )) wrapped_high = self.observation_space.low high = np.hstack(( wrapped_high, max(wrapped_high) * np.ones(self._wrapped_env.goal_dim) )) self.observation_space = Box(low, high)
def __init__(self, min_distance=0, max_distance=2, use_low_gear_ratio=True): Serializable.quick_init(self, locals()) self.max_distance = max_distance self.min_distance = min_distance MultitaskEnv.__init__(self) super().__init__(use_low_gear_ratio=use_low_gear_ratio) self.set_goal(np.array([self.max_distance, self.max_distance]))
def __init__( self, env, reward_scale=1., obs_mean=None, obs_std=None, ): # self._wrapped_env needs to be called first because # Serializable.quick_init calls getattr, on this class. And the # implementation of getattr (see below) calls self._wrapped_env. # Without setting this first, the call to self._wrapped_env would call # getattr again (since it's not set yet) and therefore loop forever. self._wrapped_env = env # Or else serialization gets delegated to the wrapped_env. Serialize # this env separately from the wrapped_env. self._serializable_initialized = False Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) self._should_normalize = not (obs_mean is None and obs_std is None) if self._should_normalize: if obs_mean is None: obs_mean = np.zeros_like(env.observation_space.low) else: obs_mean = np.array(obs_mean) if obs_std is None: obs_std = np.ones_like(env.observation_space.low) else: obs_std = np.array(obs_std) self._reward_scale = reward_scale self._obs_mean = obs_mean self._obs_std = obs_std ub = np.ones(self._wrapped_env.action_space.shape) self.action_space = Box(-1 * ub, ub)
def __init__(self, game_name, agent_num=2): Serializable.quick_init(self, locals()) self.game = game_name self.agent_num = agent_num self.states = np.zeros(self.agent_num) self.targets = np.zeros(self.agent_num) self.actions = np.zeros(self.agent_num)
def __init__(self, env, obs_mean=None, obs_std=None, acts_mean=None, acts_std=None, meta=False): self._wrapped_env = env self._serializable_initialized = False Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) if obs_mean is not None: assert obs_std is not None self._scale_obs = True else: assert obs_std is None self._scale_obs = False if acts_mean is not None: assert acts_std is not None self._unscale_acts = True else: assert acts_std is None self._unscale_acts = False self.obs_mean = obs_mean self.obs_std = obs_std self.acts_mean = acts_mean self.acts_std = acts_std
def __init__( self, max_speed=0.05, max_distance=1, use_low_gear_ratio=True, speed_weight=0.9, done_threshold=0.005, goal_dim_weights=None, ): Serializable.quick_init(self, locals()) self.max_distance = max_distance self.max_speed = max_speed self.speed_weight = speed_weight self.done_threshold = done_threshold self.initializing = True # TODO: fix this hack if speed_weight is None: self.speed_weight = 0.9 # just for init to work MultitaskEnv.__init__(self, goal_dim_weights=goal_dim_weights) super().__init__(use_low_gear_ratio=use_low_gear_ratio) self.set_goal( np.array([ self.max_distance, self.max_distance, self.max_speed, self.max_speed, ])) self.initializing = False if speed_weight is None: assert (self.goal_dim_weights[0] == self.goal_dim_weights[1]) and ( self.goal_dim_weights[2] == self.goal_dim_weights[3]) self.speed_weight = self.goal_dim_weights[2] assert 0 <= self.speed_weight <= 1
def __init__(self, game_name): Serializable.quick_init(self, locals()) self.game = game_name self.payoff = {} if self.game == 'deadlock': self.agent_num = 2 self.action_num = 2 self.payoff[0] = np.array([[-5., 0.], [5., -10]]) self.payoff[1] = np.array([[-5., 5.], [0., -10.]]) elif self.game == 'deadlock_coop': self.agent_num = 2 self.action_num = 2 self.payoff[0] = np.array([[-5., 5.], [5., -10]]) self.payoff[1] = self.payoff[0] elif self.game == 'deadlock_coop_unsym': self.agent_num = 2 self.action_num = 2 self.payoff[0] = np.array([[-5., 10.], [5., -10]]) self.payoff[1] = self.payoff[0] elif self.game == 'zero_sum': self.agent_num = 2 self.action_num = 2 self.payoff[0] = np.array([[-1., 1.], [1., -1.]]) self.payoff[1] = -self.payoff[0] else: raise NotImplementedError
def __init__(self, action_space, epsilon, max_sigma=1.0, min_sigma=None, decay_period=1000000): assert len(action_space.shape) == 1 Serializable.quick_init(self, locals()) if min_sigma is None: min_sigma = max_sigma self._max_sigma = max_sigma self._epsilon = epsilon self._min_sigma = min_sigma self._decay_period = decay_period self._action_space = action_space
def __init__(self, task_params, obs_task_params): env = Walker2DRandomDynamicsEnv() self._hack_task_params = task_params self._hack_obs_task_params = obs_task_params self._serializable_initialized = False Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) self.observation_space = self._wrapped_env.observation_space.spaces[ 'obs'] self.reset()
def __init__(self, distance_metric_order=None, goal_dim_weights=None): self._desired_xyz = np.zeros(3) Serializable.quick_init(self, locals()) MultitaskEnv.__init__( self, distance_metric_order=distance_metric_order, goal_dim_weights=goal_dim_weights, ) mujoco_env.MujocoEnv.__init__( self, get_asset_xml('reacher_7dof.xml'), 5, ) self.observation_space = Box( np.array([ -2.28, -0.52, -1.4, -2.32, -1.5, -1.094, -1.5, # joint -3, -3, -3, -3, -3, -3, -3, # velocity -0.75, -1.25, -0.2, # EE xyz ]), np.array([ 1.71, 1.39, 1.7, 0, 1.5, 0, 1.5, # joints 3, 3, 3, 3, 3, 3, 3, # velocity 0.75, 0.25, 0.6, # EE xyz ]))
def save_init_params(self, locals): """ Should call this FIRST THING in the __init__ method if you ever want to serialize or clone this network. Usage: ``` def __init__(self, ...): self.init_serialization(locals()) ... ``` :param locals: :return: """ Serializable.quick_init(self, locals)
def __init__(self, env, aug_obs_size): # self._wrapped_env needs to be called first because # Serializable.quick_init calls getattr, on this class. And the # implementation of getattr (see below) calls self._wrapped_env. # Without setting this first, the call to self._wrapped_env would call # getattr again (since it's not set yet) and therefore loop forever. self._wrapped_env = env # Or else serialization gets delegated to the wrapped_env. Serialize # this env separately from the wrapped_env. self._serializable_initialized = False Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) obs_space = self._wrapped_env.observation_space assert isinstance(obs_space, Box) low = np.hstack([obs_space.low, np.full(aug_obs_size, 0)]) high = np.hstack([obs_space.high, np.full(aug_obs_size, 1)]) self.observation_space = Box(low=low, high=high)
def __init__( self, env, obs_min=None, obs_max=None, ): self._wrapped_env = env self._serializable_initialized = False Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) if obs_min is not None: assert obs_max is not None self._scale_obs = True else: assert obs_max is None self._scale_obs = False self.obs_min = obs_min self.obs_max = obs_max
def __init__(self, task_list): Serializable.quick_init(self, locals()) self._task_envs = [] for i, task in enumerate(task_list): if task is SawyerReachPushPickPlace6DOFEnv or task is SawyerReachPushPickPlaceWall6DOFEnv: # TODO: this could cause flaws in task_idx if SawyerReachPushPickPlace6DOFEnv/SawyerReachPushPickPlaceWall6DOFEnv is not the first environment self._task_envs.append( task(multitask=False, obs_type='with_goal', random_init=True, if_render=False, fix_task=True, task_idx=i % 3)) else: self._task_envs.append( task(multitask=False, obs_type='with_goal', if_render=False, random_init=True)) self._active_task = None
def __init__( self, env, n_skills=6, ): # self._wrapped_env needs to be called first because # Serializable.quick_init calls getattr, on this class. And the # implementation of getattr (see below) calls self._wrapped_env. # Without setting this first, the call to self._wrapped_env would call # getattr again (since it's not set yet) and therefore loop forever. self._wrapped_env = env # Or else serialization gets delegated to the wrapped_env. Serialize # this env separately from the wrapped_env. self._serializable_initialized = False Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) self.sample_z = None self.n_skills = n_skills self.disc = nn.Sequential( nn.Linear(self._wrapped_env.observation_space.shape[0], 256), nn.ReLU(), nn.Linear(256, 256), nn.ReLU(), nn.Linear(256, self.n_skills), ) self.disc.cuda() self.disc.train() self.disc_optim = optim.Adam(self.disc.parameters(), lr=3e-4) self.observation_space = Box( low=self._wrapped_env.observation_space.low[0], high=self._wrapped_env.observation_space.high[0], shape=(self._wrapped_env.observation_space.shape[0] + n_skills,)) self.lock_z = False
def __init__(self): Serializable.quick_init(self, locals()) self.target_x_vel = np.random.uniform(-MAX_SPEED, MAX_SPEED) super().__init__() MultitaskEnv.__init__(self) self.set_goal(np.array([5]))
def __init__(self, wrapped_env): Serializable.quick_init(self, locals()) self._wrapped_env = wrapped_env self.action_space = self._wrapped_env.action_space self.observation_space = self._wrapped_env.observation_space
def init_serialization(self, locals): Serializable.quick_init(self, locals)
def __init__(self, action_space, prob_random_action=0.1): Serializable.quick_init(self, locals()) assert isinstance(action_space, Discrete) Serializable.quick_init(self, locals()) self.prob_random_action = prob_random_action self.action_space = action_space
def __init__(self, game_name, agent_num=2, max_path_length=5, action_range=10., state_range=10.): Serializable.quick_init(self, locals()) self.game = game_name self.agent_num = agent_num self.max_path_length = max_path_length self.action_range = action_range self.state_range = state_range self.payoff = {} self.states = np.zeros(self.agent_num) if self.game == 'zero_sum': assert self.agent_num == 2 self.payoff[0] = lambda a1, a2: a1 * a2 self.payoff[1] = lambda a1, a2: -a1 * a2 elif self.game == 'cooperative': assert self.agent_num == 2 self.payoff[0] = lambda a1, a2: a1 * a2 self.payoff[1] = lambda a1, a2: a1 * a2 elif self.game == 'trigonometric': assert self.agent_num == 2 self.payoff[0] = lambda a1, a2: np.cos(a2) * a1 self.payoff[1] = lambda a1, a2: np.sin(a1) * a2 elif self.game == 'mataching_pennies': assert self.agent_num == 2 self.payoff[0] = lambda a1, a2: (a1 - 0.5) * (a2 - 0.5) self.payoff[1] = lambda a1, a2: (a1 - 0.5) * (a2 - 0.5) elif self.game == 'rotational': assert self.agent_num == 2 self.payoff[0] = lambda a1, a2: 0.5 * a1 * a1 + 10 * a1 * a2 self.payoff[1] = lambda a1, a2: 0.5 * a2 * a2 - 10 * a1 * a2 elif self.game == 'wolf': assert self.agent_num == 2 def V(alpha, beta, payoff): u = payoff[(0, 0)] - payoff[(0, 1)] - payoff[(1, 0)] + payoff[ (1, 1)] return alpha * beta * u + alpha * (payoff[(0, 1)] - payoff[ (1, 1)]) + beta * (payoff[(1, 0)] - payoff[ (1, 1)]) + payoff[(1, 1)] payoff_0 = np.array([[0, 3], [1, 2]]) payoff_1 = np.array([[3, 2], [0, 1]]) self.payoff[0] = lambda a1, a2: V(a1, a2, payoff_0) self.payoff[1] = lambda a1, a2: V(a1, a2, payoff_1) elif self.game == 'max2': assert self.agent_num == 2 h1 = 0.8 h2 = 1. s1 = 3. s2 = 1. x1 = -5. x2 = 5. y1 = -5. y2 = 5. c = 10. def max_f(a1, a2): f1 = h1 * (-(np.square(a1 - x1) / s1) - (np.square(a2 - y1) / s1)) f2 = h2 * (-(np.square(a1 - x2) / s2) - (np.square(a2 - y2) / s2)) + c return max(f1, f2) self.payoff[0] = lambda a1, a2: max_f(a1, a2) self.payoff[1] = lambda a1, a2: max_f(a1, a2) else: raise NotImplementedError