Пример #1
0
 def __init__(
     self,
     action_space,
     mu=0,
     theta=0.15,
     max_sigma=0.3,
     min_sigma=0.3,
     decay_period=100000,
 ):
     assert len(action_space.shape) == 1
     Serializable.quick_init(self, locals())
     if min_sigma is None:
         min_sigma = max_sigma
     self.mu = mu
     self.theta = theta
     self.sigma = max_sigma
     self._max_sigma = max_sigma
     if min_sigma is None:
         min_sigma = max_sigma
     self._min_sigma = min_sigma
     self._decay_period = decay_period
     self.dim = np.prod(action_space.low.shape)
     self.low = action_space.low
     self.high = action_space.high
     self.state = np.ones(self.dim) * self.mu
     self.reset()
Пример #2
0
    def __init__(self, init_sequential=False, eigreg=False, warmstart=True):
        self._init_sequential = init_sequential
        self._eigreg = eigreg
        self._warmstart = warmstart
        self._sigma = None

        self._serializable_initialized = False
        Serializable.quick_init(self, locals())
        super(GMM, self).__init__()
Пример #3
0
    def __init__(self, obs_dim, action_dim, hidden_sizes=(100, 100), **kwargs):
        QFunction.__init__(self, obs_dim=obs_dim, action_dim=action_dim)

        self._serializable_initialized = False
        Serializable.quick_init(self, locals())
        self.save_init_params(locals())
        FlattenMlp.__init__(self,
                            hidden_sizes=hidden_sizes,
                            input_size=obs_dim + action_dim,
                            output_size=1,
                            **kwargs)
Пример #4
0
    def __init__(
        self,
        env,
        reward_scale=1.,
        normalize_obs=False,
        online_normalization=False,
        obs_mean=None,
        obs_var=None,
        obs_alpha=0.001,
    ):
        # self._wrapped_env needs to be called first because
        # Serializable.quick_init calls getattr, on this class. And the
        # implementation of getattr (see below) calls self._wrapped_env.
        # Without setting this first, the call to self._wrapped_env would call
        # getattr again (since it's not set yet) and therefore loop forever.
        self._wrapped_env = env
        # Or else serialization gets delegated to the wrapped_env. Serialize
        # this env separately from the wrapped_env.
        self._serializable_initialized = False
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env)

        # Observation Space
        if normalize_obs is True and online_normalization is True:
            raise AttributeError

        self._normalize_obs = normalize_obs
        self._online_normalize_obs = online_normalization

        if self._normalize_obs or self._online_normalize_obs:
            if obs_mean is None:
                obs_mean = np.zeros_like(env.observation_space.low)
            else:
                obs_mean = np.array(obs_mean)
            if obs_var is None:
                obs_var = np.ones_like(env.observation_space.low)
            else:
                obs_var = np.array(obs_var)

        self._obs_mean = obs_mean
        self._obs_var = obs_var
        self._obs_alpha = obs_alpha

        self._obs_mean_diff = np.zeros_like(env.observation_space.low)
        self._obs_n = 0

        # Action Space
        if isinstance(self._wrapped_env.action_space, Box):
            ub = np.ones(self._wrapped_env.action_space.shape)
            self.action_space = Box(-1 * ub, ub, dtype=np.float32)

        # Reward Scale
        self._reward_scale = reward_scale
Пример #5
0
 def __init__(self,
              action_space,
              max_sigma=1.0,
              min_sigma=None,
              decay_period=1000000):
     assert len(action_space.shape) == 1
     Serializable.quick_init(self, locals())
     self._max_sigma = max_sigma
     if min_sigma is None:
         min_sigma = max_sigma
     self._min_sigma = min_sigma
     self._decay_period = decay_period
     self._action_space = action_space
Пример #6
0
    def __init__(self, horizon, obs_dim, action_dim):
        self._T = horizon
        Transition.__init__(self, obs_dim=obs_dim, action_dim=action_dim)

        self._serializable_initialized = False
        Serializable.quick_init(self, locals())
        super(TVLGDynamics, self).__init__()

        self.Fm = nn.Parameter(
            ptu.zeros(horizon, obs_dim, obs_dim + action_dim))
        self.fv = nn.Parameter(ptu.ones(horizon, obs_dim))
        self.dyn_covar = nn.Parameter(ptu.zeros(horizon, obs_dim, obs_dim))

        # Prior
        self._prior = None
Пример #7
0
    def save_init_params(self, locals):
        """
        Should call this FIRST THING in the __init__ method if you ever want
        to serialize or clone this network.

        Usage:
        ```
        def __init__(self, ...):
            self.init_serialization(locals())
            ...
        ```
        :param locals:
        :return:
        """
        Serializable.quick_init(self, locals)
Пример #8
0
 def __getstate__(self):
     d = Serializable.__getstate__(self)
     # Add these explicitly in case they were modified
     d["_obs_mean"] = self._obs_mean
     d["_obs_var"] = self._obs_var
     d["_reward_scale"] = self._reward_scale
     return d
    def __init__(
        self,
        action_space,
        horizon,
        smooth=True,
        renormalize=True,
        sigma=10.0,
        sigma_scale=None,
    ):
        Serializable.quick_init(self, locals())

        self._action_space = action_space
        self.low = action_space.low
        self.high = action_space.high

        self._horizon = horizon

        self._smooth = smooth
        self._renormalize = renormalize
        self._sigma = sigma

        if sigma_scale is None:
            self._sigma_scale = np.ones(self.action_dim)
        else:
            # Check if iterable
            try:
                iter(sigma_scale)
                if len(sigma_scale) != self.action_dim:
                    raise ValueError("Sigma scale different than action dim"
                                     "(%02d != %02d)" %
                                     (sigma_scale, self.action_dim))
                self._sigma_scale = sigma_scale
            except TypeError as te:
                self._sigma_scale = np.repeat(sigma_scale, self.action_dim)

        self.noise = None
        self.reset()
Пример #10
0
 def __init__(self, wrapped_env):
     Serializable.quick_init(self, locals())
     self._wrapped_env = wrapped_env
     self.action_space = self._wrapped_env.action_space
     self.observation_space = self._wrapped_env.observation_space
    def __init__(
            self,
            goal_reward=10,
            actuation_cost_coeff=30,
            distance_cost_coeff=1,
            log_distance_cost_coeff=1,
            alpha=1e-6,
            init_position=None,
            init_sigma=0.1,
            goal_position=None,
            goal_threshold=0.1,
            dynamics_sigma=0,
            horizon=None,
            subtask=None,
            seed=None,
    ):
        Serializable.__init__(self)
        Serializable.quick_init(self, locals())

        # Set the seed
        self.seed(seed)

        self._subtask = subtask

        # Point Dynamics
        self._dynamics = PointDynamics(dim=2, sigma=dynamics_sigma)

        # Initial Position
        if init_position is None:
            init_position = (0, 0)
        self.init_mu = np.array(init_position, dtype=np.float32)
        self.init_sigma = init_sigma

        # Goal Position
        if goal_position is None:
            self._goal_position = np.array([5, 5], dtype=np.float32)
        else:
            self._goal_position = np.array(goal_position, dtype=np.float32)

        # Masks
        self.goal_masks = [[True, True],
                           [True, False],
                           [False, True]]

        # Reward-related Variables
        self._goal_threshold = goal_threshold
        self._goal_reward = goal_reward
        self._action_cost_coeff = actuation_cost_coeff
        self._distance_cost_coeff = distance_cost_coeff
        self._alpha = alpha
        self._log_distance_cost_coeff = log_distance_cost_coeff

        # Maximum Reward
        self._max_rewards = [0, 0, 0]
        reward_output = self.compute_reward(self.goal_position, np.zeros(2))
        self._max_rewards[0] = reward_output[0]
        self._max_rewards[1] = reward_output[2][0]
        self._max_rewards[2] = reward_output[2][1]

        # Bounds
        self._xlim = (-7, 7)
        self._ylim = (-7, 7)
        self._vel_bound = 1.
        self._observation = None

        # Main Rendering
        self._main_fig = None
        self._main_ax = None
        self._dynamic_line = None
        self._main_marker = None
        self._env_lines = list()

        # Subgoals rendering
        self._subgoals_fig = None
        self._subgoals_ax = None
        self._dynamic_goals_lines = list()
        self._subgoal_markers = [None for _ in range(self.n_subgoals)]

        # Time-related variables
        self._t_counter = 0
        self._horizon = horizon

        # Random
        self.np_random = np.random
Пример #12
0
 def __setstate__(self, d):
     Serializable.__setstate__(self, d)
     self.set_param_values(d["params"])
Пример #13
0
 def __getstate__(self):
     d = Serializable.__getstate__(self)
     d["params"] = self.get_param_values()
     return d
Пример #14
0
 def copy(self):
     copy = Serializable.clone(self)
     ptu.copy_model_params_from_to(self, copy)
     return copy
Пример #15
0
 def __init__(self, action_space, prob_random_action=0.1):
     Serializable.quick_init(self, locals())
     assert isinstance(action_space, Discrete)
     Serializable.quick_init(self, locals())
     self.prob_random_action = prob_random_action
     self.action_space = action_space
Пример #16
0
    def __init__(
        self,
        obs_dim,
        n_vs,
        shared_hidden_sizes=None,
        unshared_hidden_sizes=None,
        hidden_activation='relu',
        hidden_w_init='xavier_normal',
        hidden_b_init_val=0,
        output_activation='linear',
        output_w_init='xavier_normal',
        output_b_init_val=0,
        shared_layer_norm=False,
        unshared_layer_norm=False,
        layer_norm_kwargs=None,
    ):

        VFunction.__init__(self, obs_dim=obs_dim)

        self._n_vs = n_vs

        self._serializable_initialized = False
        Serializable.quick_init(self, locals())
        super(NNMultiVFunction, self).__init__()

        if layer_norm_kwargs is None:
            layer_norm_kwargs = dict()

        self._hidden_activation = ptu.get_activation(hidden_activation)
        self._output_activation = ptu.get_activation(output_activation)
        self._shared_layer_norm = shared_layer_norm
        self._unshared_layer_norm = unshared_layer_norm
        self._sfcs = []
        self._sfc_norms = []
        self._ufcs = [list() for _ in range(self._n_vs)]
        self._ufc_norms = [list() for _ in range(self._n_vs)]
        self._ufcs_lasts = []

        in_size = obs_dim
        # Shared Layers
        if shared_hidden_sizes is not None:
            for ii, next_size in enumerate(shared_hidden_sizes):
                sfc = nn.Linear(in_size, next_size)
                ptu.layer_init(layer=sfc,
                               activation=hidden_activation,
                               b=hidden_b_init_val)
                self.__setattr__("sfc{}".format(ii), sfc)
                self._sfcs.append(sfc)

                if self._shared_layer_norm:
                    ln = LayerNorm(next_size)
                    self.__setattr__("sfc{}_norm".format(ii), ln)
                    self._sfc_norms.append(ln)
                in_size = next_size

        # Unshared Layers
        if unshared_hidden_sizes is not None:
            for ii, next_size in enumerate(unshared_hidden_sizes):
                for q_idx in range(self._n_vs):
                    ufc = nn.Linear(in_size, next_size)
                    ptu.layer_init(layer=ufc,
                                   activation=hidden_activation,
                                   b=hidden_b_init_val)
                    self.__setattr__("ufc{}_{}".format(q_idx, ii), ufc)
                    self._ufcs[q_idx].append(ufc)

                    if self._unshared_layer_norm:
                        ln = LayerNorm(next_size)
                        tmp_txt = "ufc{}_{}_norm".format(q_idx, ii)
                        self.__setattr__(tmp_txt, ln)
                        self._ufc_norms[q_idx].append(ln)
                in_size = next_size

        for q_idx in range(self._n_vs):
            last_ufc = nn.Linear(in_size, 1)
            ptu.layer_init(layer=last_ufc,
                           activation=output_activation,
                           b=output_b_init_val)
            self.__setattr__("ufc_last{}".format(q_idx), last_ufc)
            self._ufcs_lasts.append(last_ufc)
Пример #17
0
 def __setstate__(self, d):
     Serializable.__setstate__(self, d)
     self._obs_mean = d["_obs_mean"]
     self._obs_var = d["_obs_var"]
     self._reward_scale = d["_reward_scale"]