示例#1
0
class Reacher:
    def __init__(self):
        self.mean = 0
        self.std = 1
        self.dims = 52
        self.lb = -1 * np.ones(self.dims)
        self.ub = 1 * np.ones(self.dims)
        self.counter = 0
        self.env = FlattenObservation(
            FilterObservation(gym.make('FetchReach-v1'),
                              ['observation', 'desired_goal']))
        self.num_rollouts = 3
        self.render = False
        self.policy_shape = (4, 13)

        #tunable hyper-parameters in LA-MCTS
        self.Cp = 10
        self.leaf_size = 100
        self.kernel_type = "linear"
        self.gamma_type = "auto"
        self.ninits = 30

        print("===========initialization===========")
        print("mean:", self.mean)
        print("std:", self.std)
        print("dims:", self.dims)
        print("policy:", self.policy_shape)

    def __call__(self, x):
        self.counter += 1
        assert len(x) == self.dims
        assert x.ndim == 1
        assert np.all(x <= self.ub) and np.all(x >= self.lb)

        M = x.reshape(self.policy_shape)

        returns = []
        observations = []
        actions = []

        for i in range(self.num_rollouts):
            obs = self.env.reset()
            done = False
            totalr = 0.
            steps = 0
            while not done:
                # M      = self.policy
                inputs = (obs - self.mean) / self.std
                action = np.dot(M, inputs)
                observations.append(obs)
                actions.append(action)
                obs, r, done, _ = self.env.step(action)
                totalr += r
                steps += 1
                if self.render:
                    self.env.render()
            returns.append(totalr)

        return np.mean(returns) * -1
示例#2
0
def test_flatten_observation(env_id):
    env = gym.make(env_id)
    wrapped_env = FlattenObservation(env)

    obs = env.reset()
    wrapped_obs = wrapped_env.reset()

    assert len(obs.shape) == 3
    assert len(wrapped_obs.shape) == 1
    assert wrapped_obs.shape[0] == obs.shape[0] * obs.shape[1] * obs.shape[2]
示例#3
0
    def test_flattened_environment(self, observation_space, ordered_values):
        """
        make sure that flattened observations occur in the order expected
        """
        env = FakeEnvironment(observation_space=observation_space)
        wrapped_env = FlattenObservation(env)
        flattened = wrapped_env.reset()

        unflattened = unflatten(env.observation_space, flattened)
        original = env.observation

        self._check_observations(original, flattened, unflattened,
                                 ordered_values)
def test_flatten_observation(env_id):
    env = gym.make(env_id)
    wrapped_env = FlattenObservation(env)

    obs = env.reset()
    wrapped_obs = wrapped_env.reset()

    space = spaces.Tuple(
        (spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2)))
    wrapped_space = spaces.Box(0, 1, [32 + 11 + 2], dtype=np.int64)

    assert space.contains(obs)
    assert wrapped_space.contains(wrapped_obs)
def test_flatten_observation(env_id):
    env = gym.make(env_id)
    wrapped_env = FlattenObservation(env)

    obs = env.reset()
    wrapped_obs = wrapped_env.reset()

    if env_id == 'Blackjack-v0':
        space = spaces.Tuple(
            (spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2)))
        wrapped_space = spaces.Box(-np.inf,
                                   np.inf, [32 + 11 + 2],
                                   dtype=np.float32)
    elif env_id == 'KellyCoinflip-v0':
        space = spaces.Tuple(
            (spaces.Box(0, 250.0, [1],
                        dtype=np.float32), spaces.Discrete(300 + 1)))
        wrapped_space = spaces.Box(-np.inf,
                                   np.inf, [1 + (300 + 1)],
                                   dtype=np.float32)

    assert space.contains(obs)
    assert wrapped_space.contains(wrapped_obs)
示例#6
0
 def test_nested_dicts_ravel(self, observation_space, flat_shape):
     env = FakeEnvironment(observation_space=observation_space)
     wrapped_env = FlattenObservation(FilterObservation(env, env.obs_keys))
     obs = wrapped_env.reset()
     assert obs.shape == wrapped_env.observation_space.shape
示例#7
0
        # Define and parameterize the reference generator for the current reference
        reference_generator=WienerProcessReferenceGenerator(
            reference_state='i', sigma_range=(3e-3, 3e-2)),

        # Defines which variables to plot via the builtin dashboard monitor
        visualization=MotorDashboard(state_plots=['i', 'omega']),
    )

    # Now, the environment will output states and references separately
    state, ref = env.reset()

    # For data processing we sometimes want to flatten the env output,
    # which means that the env will only output one array that contains states and references consecutively
    env = FlattenObservation(env)
    obs = env.reset()

    # Read the number of possible actions for the given env
    # this allows us to define a proper learning agent for this task
    nb_actions = env.action_space.n

    window_length = 1

    # Define an artificial neural network to be used within the agent
    model = Sequential()
    # The network's input fits the observation space of the env
    model.add(
        Flatten(input_shape=(window_length, ) + env.observation_space.shape))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(4, activation='relu'))