示例#1
0
 def __init__(
     self,
     action_space,
     mu=0,
     theta=0.15,
     max_sigma=0.3,
     min_sigma=None,
     decay_period=100000,
 ):
     Serializable.quick_init(self, locals())
     if min_sigma is None:
         min_sigma = max_sigma
     self.mu = mu
     self.theta = theta
     self.sigma = max_sigma
     self._max_sigma = max_sigma
     if min_sigma is None:
         min_sigma = max_sigma
     self._min_sigma = min_sigma
     self._decay_period = decay_period
     self.dim = np.prod(action_space.low.shape)
     self.low = action_space.low
     self.high = action_space.high
     self.state = np.ones(self.dim) * self.mu
     self.reset()
    def __init__(
            self,
            env: MultitaskEnv,
            give_goal_difference=False,
    ):
        # self._wrapped_env needs to be called first because
        # Serializable.quick_init calls getattr, on this class. And the
        # implementation of getattr (see below) calls self._wrapped_env.
        # Without setting this first, the call to self._wrapped_env would call
        # getattr again (since it's not set yet) and therefore loop forever.
        self._wrapped_env = env
        # Or else serialization gets delegated to the wrapped_env. Serialize
        # this env separately from the wrapped_env.
        self._serializable_initialized = False
        self._wrapped_obs_dim = env.observation_space.low.size
        self.give_goal_difference = give_goal_difference
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env)

        wrapped_low = self.observation_space.low
        low = np.hstack((
            wrapped_low,
            min(wrapped_low) * np.ones(self._wrapped_env.goal_dim)
        ))
        wrapped_high = self.observation_space.low
        high = np.hstack((
            wrapped_high,
            max(wrapped_high) * np.ones(self._wrapped_env.goal_dim)
        ))
        self.observation_space = Box(low, high)
示例#3
0
 def __init__(self, min_distance=0, max_distance=2, use_low_gear_ratio=True):
     Serializable.quick_init(self, locals())
     self.max_distance = max_distance
     self.min_distance = min_distance
     MultitaskEnv.__init__(self)
     super().__init__(use_low_gear_ratio=use_low_gear_ratio)
     self.set_goal(np.array([self.max_distance, self.max_distance]))
示例#4
0
 def __init__(
     self,
     env,
     reward_scale=1.,
     obs_mean=None,
     obs_std=None,
 ):
     # self._wrapped_env needs to be called first because
     # Serializable.quick_init calls getattr, on this class. And the
     # implementation of getattr (see below) calls self._wrapped_env.
     # Without setting this first, the call to self._wrapped_env would call
     # getattr again (since it's not set yet) and therefore loop forever.
     self._wrapped_env = env
     # Or else serialization gets delegated to the wrapped_env. Serialize
     # this env separately from the wrapped_env.
     self._serializable_initialized = False
     Serializable.quick_init(self, locals())
     ProxyEnv.__init__(self, env)
     self._should_normalize = not (obs_mean is None and obs_std is None)
     if self._should_normalize:
         if obs_mean is None:
             obs_mean = np.zeros_like(env.observation_space.low)
         else:
             obs_mean = np.array(obs_mean)
         if obs_std is None:
             obs_std = np.ones_like(env.observation_space.low)
         else:
             obs_std = np.array(obs_std)
     self._reward_scale = reward_scale
     self._obs_mean = obs_mean
     self._obs_std = obs_std
     ub = np.ones(self._wrapped_env.action_space.shape)
     self.action_space = Box(-1 * ub, ub)
示例#5
0
 def __init__(self, game_name, agent_num=2):
     Serializable.quick_init(self, locals())
     self.game = game_name
     self.agent_num = agent_num
     self.states = np.zeros(self.agent_num)
     self.targets = np.zeros(self.agent_num)
     self.actions = np.zeros(self.agent_num)
示例#6
0
    def __init__(self,
                 env,
                 obs_mean=None,
                 obs_std=None,
                 acts_mean=None,
                 acts_std=None,
                 meta=False):
        self._wrapped_env = env
        self._serializable_initialized = False
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env)

        if obs_mean is not None:
            assert obs_std is not None
            self._scale_obs = True
        else:
            assert obs_std is None
            self._scale_obs = False

        if acts_mean is not None:
            assert acts_std is not None
            self._unscale_acts = True
        else:
            assert acts_std is None
            self._unscale_acts = False

        self.obs_mean = obs_mean
        self.obs_std = obs_std
        self.acts_mean = acts_mean
        self.acts_std = acts_std
示例#7
0
 def __init__(
     self,
     max_speed=0.05,
     max_distance=1,
     use_low_gear_ratio=True,
     speed_weight=0.9,
     done_threshold=0.005,
     goal_dim_weights=None,
 ):
     Serializable.quick_init(self, locals())
     self.max_distance = max_distance
     self.max_speed = max_speed
     self.speed_weight = speed_weight
     self.done_threshold = done_threshold
     self.initializing = True
     # TODO: fix this hack
     if speed_weight is None:
         self.speed_weight = 0.9  # just for init to work
     MultitaskEnv.__init__(self, goal_dim_weights=goal_dim_weights)
     super().__init__(use_low_gear_ratio=use_low_gear_ratio)
     self.set_goal(
         np.array([
             self.max_distance,
             self.max_distance,
             self.max_speed,
             self.max_speed,
         ]))
     self.initializing = False
     if speed_weight is None:
         assert (self.goal_dim_weights[0] == self.goal_dim_weights[1]) and (
             self.goal_dim_weights[2] == self.goal_dim_weights[3])
         self.speed_weight = self.goal_dim_weights[2]
     assert 0 <= self.speed_weight <= 1
示例#8
0
    def __init__(self, game_name):
        Serializable.quick_init(self, locals())
        self.game = game_name
        self.payoff = {}

        if self.game == 'deadlock':
            self.agent_num = 2
            self.action_num = 2
            self.payoff[0] = np.array([[-5., 0.], [5., -10]])
            self.payoff[1] = np.array([[-5., 5.], [0., -10.]])
        elif self.game == 'deadlock_coop':
            self.agent_num = 2
            self.action_num = 2
            self.payoff[0] = np.array([[-5., 5.], [5., -10]])
            self.payoff[1] = self.payoff[0]
        elif self.game == 'deadlock_coop_unsym':
            self.agent_num = 2
            self.action_num = 2
            self.payoff[0] = np.array([[-5., 10.], [5., -10]])
            self.payoff[1] = self.payoff[0]
        elif self.game == 'zero_sum':
            self.agent_num = 2
            self.action_num = 2
            self.payoff[0] = np.array([[-1., 1.], [1., -1.]])
            self.payoff[1] = -self.payoff[0]
        else:
            raise NotImplementedError
示例#9
0
 def __init__(self, action_space, epsilon, max_sigma=1.0, min_sigma=None,
              decay_period=1000000):
     assert len(action_space.shape) == 1
     Serializable.quick_init(self, locals())
     if min_sigma is None:
         min_sigma = max_sigma
     self._max_sigma = max_sigma
     self._epsilon = epsilon
     self._min_sigma = min_sigma
     self._decay_period = decay_period
     self._action_space = action_space
    def __init__(self, task_params, obs_task_params):
        env = Walker2DRandomDynamicsEnv()
        self._hack_task_params = task_params
        self._hack_obs_task_params = obs_task_params
        self._serializable_initialized = False
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env)

        self.observation_space = self._wrapped_env.observation_space.spaces[
            'obs']
        self.reset()
 def __init__(self, distance_metric_order=None, goal_dim_weights=None):
     self._desired_xyz = np.zeros(3)
     Serializable.quick_init(self, locals())
     MultitaskEnv.__init__(
         self,
         distance_metric_order=distance_metric_order,
         goal_dim_weights=goal_dim_weights,
     )
     mujoco_env.MujocoEnv.__init__(
         self,
         get_asset_xml('reacher_7dof.xml'),
         5,
     )
     self.observation_space = Box(
         np.array([
             -2.28,
             -0.52,
             -1.4,
             -2.32,
             -1.5,
             -1.094,
             -1.5,  # joint
             -3,
             -3,
             -3,
             -3,
             -3,
             -3,
             -3,  # velocity
             -0.75,
             -1.25,
             -0.2,  # EE xyz
         ]),
         np.array([
             1.71,
             1.39,
             1.7,
             0,
             1.5,
             0,
             1.5,  # joints
             3,
             3,
             3,
             3,
             3,
             3,
             3,  # velocity
             0.75,
             0.25,
             0.6,  # EE xyz
         ]))
示例#12
0
    def save_init_params(self, locals):
        """
        Should call this FIRST THING in the __init__ method if you ever want
        to serialize or clone this network.

        Usage:
        ```
        def __init__(self, ...):
            self.init_serialization(locals())
            ...
        ```
        :param locals:
        :return:
        """
        Serializable.quick_init(self, locals)
示例#13
0
    def __init__(self, env, aug_obs_size):
        # self._wrapped_env needs to be called first because
        # Serializable.quick_init calls getattr, on this class. And the
        # implementation of getattr (see below) calls self._wrapped_env.
        # Without setting this first, the call to self._wrapped_env would call
        # getattr again (since it's not set yet) and therefore loop forever.
        self._wrapped_env = env
        # Or else serialization gets delegated to the wrapped_env. Serialize
        # this env separately from the wrapped_env.
        self._serializable_initialized = False
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env)

        obs_space = self._wrapped_env.observation_space
        assert isinstance(obs_space, Box)
        low = np.hstack([obs_space.low, np.full(aug_obs_size, 0)])
        high = np.hstack([obs_space.high, np.full(aug_obs_size, 1)])
        self.observation_space = Box(low=low, high=high)
示例#14
0
    def __init__(
        self,
        env,
        obs_min=None,
        obs_max=None,
    ):
        self._wrapped_env = env
        self._serializable_initialized = False
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env)

        if obs_min is not None:
            assert obs_max is not None
            self._scale_obs = True
        else:
            assert obs_max is None
            self._scale_obs = False

        self.obs_min = obs_min
        self.obs_max = obs_max
示例#15
0
 def __init__(self, task_list):
     Serializable.quick_init(self, locals())
     self._task_envs = []
     for i, task in enumerate(task_list):
         if task is SawyerReachPushPickPlace6DOFEnv or task is SawyerReachPushPickPlaceWall6DOFEnv:
             # TODO: this could cause flaws in task_idx if SawyerReachPushPickPlace6DOFEnv/SawyerReachPushPickPlaceWall6DOFEnv is not the first environment
             self._task_envs.append(
                 task(multitask=False,
                      obs_type='with_goal',
                      random_init=True,
                      if_render=False,
                      fix_task=True,
                      task_idx=i % 3))
         else:
             self._task_envs.append(
                 task(multitask=False,
                      obs_type='with_goal',
                      if_render=False,
                      random_init=True))
     self._active_task = None
示例#16
0
    def __init__(
            self,
            env,
            n_skills=6,
    ):
        # self._wrapped_env needs to be called first because
        # Serializable.quick_init calls getattr, on this class. And the
        # implementation of getattr (see below) calls self._wrapped_env.
        # Without setting this first, the call to self._wrapped_env would call
        # getattr again (since it's not set yet) and therefore loop forever.
        self._wrapped_env = env
        # Or else serialization gets delegated to the wrapped_env. Serialize
        # this env separately from the wrapped_env.
        self._serializable_initialized = False
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env)

        self.sample_z = None
        self.n_skills = n_skills
        
        self.disc = nn.Sequential(
            nn.Linear(self._wrapped_env.observation_space.shape[0], 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, self.n_skills),
        )
        self.disc.cuda()
        self.disc.train()
        self.disc_optim = optim.Adam(self.disc.parameters(), lr=3e-4)

        self.observation_space = Box(
            low=self._wrapped_env.observation_space.low[0],
            high=self._wrapped_env.observation_space.high[0],
            shape=(self._wrapped_env.observation_space.shape[0] + n_skills,))
        
        self.lock_z = False
示例#17
0
 def __init__(self):
     Serializable.quick_init(self, locals())
     self.target_x_vel = np.random.uniform(-MAX_SPEED, MAX_SPEED)
     super().__init__()
     MultitaskEnv.__init__(self)
     self.set_goal(np.array([5]))
示例#18
0
 def __init__(self, wrapped_env):
     Serializable.quick_init(self, locals())
     self._wrapped_env = wrapped_env
     self.action_space = self._wrapped_env.action_space
     self.observation_space = self._wrapped_env.observation_space
示例#19
0
 def init_serialization(self, locals):
     Serializable.quick_init(self, locals)
示例#20
0
 def __init__(self, action_space, prob_random_action=0.1):
     Serializable.quick_init(self, locals())
     assert isinstance(action_space, Discrete)
     Serializable.quick_init(self, locals())
     self.prob_random_action = prob_random_action
     self.action_space = action_space
示例#21
0
    def __init__(self,
                 game_name,
                 agent_num=2,
                 max_path_length=5,
                 action_range=10.,
                 state_range=10.):
        Serializable.quick_init(self, locals())
        self.game = game_name
        self.agent_num = agent_num
        self.max_path_length = max_path_length
        self.action_range = action_range
        self.state_range = state_range
        self.payoff = {}
        self.states = np.zeros(self.agent_num)

        if self.game == 'zero_sum':
            assert self.agent_num == 2
            self.payoff[0] = lambda a1, a2: a1 * a2
            self.payoff[1] = lambda a1, a2: -a1 * a2
        elif self.game == 'cooperative':
            assert self.agent_num == 2
            self.payoff[0] = lambda a1, a2: a1 * a2
            self.payoff[1] = lambda a1, a2: a1 * a2
        elif self.game == 'trigonometric':
            assert self.agent_num == 2
            self.payoff[0] = lambda a1, a2: np.cos(a2) * a1
            self.payoff[1] = lambda a1, a2: np.sin(a1) * a2
        elif self.game == 'mataching_pennies':
            assert self.agent_num == 2
            self.payoff[0] = lambda a1, a2: (a1 - 0.5) * (a2 - 0.5)
            self.payoff[1] = lambda a1, a2: (a1 - 0.5) * (a2 - 0.5)
        elif self.game == 'rotational':
            assert self.agent_num == 2
            self.payoff[0] = lambda a1, a2: 0.5 * a1 * a1 + 10 * a1 * a2
            self.payoff[1] = lambda a1, a2: 0.5 * a2 * a2 - 10 * a1 * a2
        elif self.game == 'wolf':
            assert self.agent_num == 2

            def V(alpha, beta, payoff):
                u = payoff[(0, 0)] - payoff[(0, 1)] - payoff[(1, 0)] + payoff[
                    (1, 1)]
                return alpha * beta * u + alpha * (payoff[(0, 1)] - payoff[
                    (1, 1)]) + beta * (payoff[(1, 0)] - payoff[
                        (1, 1)]) + payoff[(1, 1)]

            payoff_0 = np.array([[0, 3], [1, 2]])
            payoff_1 = np.array([[3, 2], [0, 1]])

            self.payoff[0] = lambda a1, a2: V(a1, a2, payoff_0)
            self.payoff[1] = lambda a1, a2: V(a1, a2, payoff_1)

        elif self.game == 'max2':
            assert self.agent_num == 2
            h1 = 0.8
            h2 = 1.
            s1 = 3.
            s2 = 1.
            x1 = -5.
            x2 = 5.
            y1 = -5.
            y2 = 5.
            c = 10.

            def max_f(a1, a2):
                f1 = h1 * (-(np.square(a1 - x1) / s1) -
                           (np.square(a2 - y1) / s1))
                f2 = h2 * (-(np.square(a1 - x2) / s2) -
                           (np.square(a2 - y2) / s2)) + c
                return max(f1, f2)

            self.payoff[0] = lambda a1, a2: max_f(a1, a2)
            self.payoff[1] = lambda a1, a2: max_f(a1, a2)
        else:
            raise NotImplementedError