Exemplo n.º 1
0
 def _ivariants_sorted(self, sorted_keys):
     if len(sorted_keys) == 0:
         yield dict()
     else:
         first_keys = sorted_keys[:-1]
         first_variants = self._ivariants_sorted(first_keys)
         last_key = sorted_keys[-1]
         last_vals = [v for k, v, _ in self._variants if k == last_key][0]
         if hasattr(last_vals, "__call__"):
             last_val_keys = inspect.getargspec(last_vals).args
             if hasattr(last_vals, 'im_self') or hasattr(
                     last_vals, '__self__'):
                 last_val_keys = last_val_keys[1:]
         else:
             last_val_keys = None
         for variant in first_variants:
             if hasattr(last_vals, "__call__"):
                 last_variants = last_vals(
                     **{k: variant[k]
                        for k in last_val_keys})
                 for last_choice in last_variants:
                     yield AttrDict(variant, **{last_key: last_choice})
             else:
                 for last_choice in last_vals:
                     yield AttrDict(variant, **{last_key: last_choice})
        return samples

    def get_action(self, t, observation, policy, **kwargs):
        #applying MC Dropout and taking the mean action?
        action, _ = policy.get_action(observation)
        mc_dropout = 10
        all_actions = np.zeros(shape=(mc_dropout, action.shape[0]))

        for d in range(mc_dropout):
            action, _ = policy.get_action(observation)
            all_actions[d, :] = action

        mean_action = np.mean(all_actions, axis=0)

        return mean_action


if __name__ == "__main__":
    ou = MCDropout(
        env_spec=AttrDict(action_space=Box(low=-1, high=1, shape=(1, ))),
        mu=0,
        theta=0.15,
        sigma=0.3)
    states = []
    for i in range(1000):
        states.append(ou.evolve_state()[0])
    import matplotlib.pyplot as plt

    plt.plot(states)
    plt.show()