Exemplo n.º 1
0
def test_invalid_seeds():
    for seed in [-1, 'test']:
        try:
            seeding.np_random(seed)
        except error.Error:
            pass
        else:
            assert False, 'Invalid seed {} passed validation'.format(seed)
Exemplo n.º 2
0
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)

        high = np.array([1., 1., self.max_speed])
        self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,), np_random=self.np_random)
        self.observation_space = spaces.Box(low=-high, high=high, np_random=self.np_random)
        return [seed]
Exemplo n.º 3
0
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)

        self.action_space = spaces.Discrete(self.n, np_random=self.np_random)
        self.observation_space = spaces.Discrete(1, np_random=self.np_random)

        return [seed]
Exemplo n.º 4
0
 def _seed(self, seed=None):
     self.np_random, seed = seeding.np_random(seed)
     # Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
     high = np.array([self.x_threshold, np.inf, self.theta_threshold_radians * 2, np.inf])
     self.action_space = spaces.Discrete(2, np_random=self.np_random)
     self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
     return [seed]
Exemplo n.º 5
0
 def _seed(self, seed=None):
     self.np_random, seed = seeding.np_random(seed)
     self.action_space = spaces.Discrete(2, np_random=self.np_random)
     self.observation_space = spaces.Tuple((
         spaces.Discrete(32, np_random=self.np_random),
         spaces.Discrete(11, np_random=self.np_random),
         spaces.Discrete(2, np_random=self.np_random)))
     return [seed]
Exemplo n.º 6
0
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)

        high = np.array([np.pi, np.pi, self.MAX_VEL_1, self.MAX_VEL_2])
        low = -high
        self.observation_space = spaces.Box(low, high, np_random=self.np_random)
        self.action_space = spaces.Discrete(3, np_random=self.np_random)
        return [seed]
Exemplo n.º 7
0
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)

        # useful range is -1 .. +1
        high = np.array([np.inf]*8)
        # nop, fire left engine, main engine, right engine
        self.action_space = spaces.Discrete(4, np_random=self.np_random)
        self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
        return [seed]
Exemplo n.º 8
0
 def _seed(self, seed=None):
     self.np_random, seed1 = seeding.np_random(seed)
     # Derive a random seed. This gets passed as a uint, but gets
     # checked as an int elsewhere, so we need to keep it below
     # 2**31.
     seed2 = seeding.hash_seed(seed1 + 1) % 2**31
     # Empirically, we need to seed before loading the ROM.
     self.ale.setInt(b'random_seed', seed2)
     self.ale.loadROM(self.game_path)
     return [seed1, seed2]
Exemplo n.º 9
0
    def _seed(self, seed=None):
        np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**32
        self.game.set_seed(seed2)

        # 3 allowed actions [12, 13, 14] (must match .cfg file)
        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 3), np_random=np_random)
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
        return [seed1, seed2]
Exemplo n.º 10
0
    def _seed(self, seed=None):
        np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**32
        self.game.set_seed(seed2)

        # action indexes are [0, 9, 10, 12, 13, 14]
        self.action_space = spaces.HighLow(np.matrix([[0, 1, 0]] * 6), np_random=np_random)
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), np_random=np_random)
        return [seed1, seed2]
Exemplo n.º 11
0
Arquivo: go.py Projeto: cdingding/gym
    def _seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**32
        pachi_py.pachi_srand(seed2)

        shape = pachi_py.CreateBoard(self.board_size).encode().shape
        self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape), np_random=self.np_random)
        # One action for each board position, pass, and resign
        self.action_space = spaces.Discrete(self.board_size**2 + 2, np_random=self.np_random)
        return [seed1, seed2]
Exemplo n.º 12
0
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high, np_random=self.np_random)

        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high, np_random=self.np_random)
        return [seed]
Exemplo n.º 13
0
 def _seed(self, seed=None):
   self.ale.setFloat(b'repeat_action_probability', options.repeat_action_probability)
   from gym.utils import seeding
   self.np_random, seed1 = seeding.np_random(seed)
   # Derive a random seed. This gets passed as a uint, but gets
   # checked as an int elsewhere, so we need to keep it below
   # 2**31.
   seed2 = seeding.hash_seed(seed1 + 1) % 2 ** 31
   # Empirically, we need to seed before loading the ROM.
   self.ale.setInt(b'random_seed', seed2)
   self.ale.loadROM(self.game_path)
   return [seed1, seed2]
Exemplo n.º 14
0
Arquivo: hex.py Projeto: Aishunk/gym
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)

        # Update the random policy if needed
        if isinstance(self.opponent, str):
            if self.opponent == 'random':
                self.opponent_policy = make_random_policy(self.np_random)
            else:
                raise error.Error('Unrecognized opponent policy {}'.format(self.opponent))
        else:
            self.opponent_policy = self.opponent

        return [seed]
Exemplo n.º 15
0
Arquivo: hex.py Projeto: cdingding/gym
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)

        # One action for each board position and resign
        self.action_space = spaces.Discrete(self.board_size ** 2 + 1, np_random=self.np_random)
        observation = self.reset()
        self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape), np_random=self.np_random)

        # Update the random policy if needed
        if isinstance(self.opponent, str):
            if self.opponent == 'random':
                self.opponent_policy = make_random_policy(self.np_random)
            else:
                raise error.Error('Unrecognized opponent policy {}'.format(self.opponent))
        else:
            self.opponent_policy = self.opponent

        return [seed]
Exemplo n.º 16
0
def main():
    np_random, seed = seeding.np_random(BASE_SEED)
    print("Base seed: {}, derived seed: {}".format(BASE_SEED, seed))

    rc = Client()
    dview = rc[:]
    nb_nodes = len(rc.ids)
    dview.push({
        "simulate_episode": simulate_episode,
        "SIZE" : SIZE,
        "AGENT_STARTS": AGENT_STARTS,
        "AGENT_TAG": AGENT_TAG})

    env = PentagoEnv(SIZE, None, agent_starts = AGENT_STARTS, to_win=SIZE)
    env.seed(np_random.randint(maxint))
    exploiting_agent = TabularQAgent(env, AGENT_TAG, None, load_model=True, userconfig={ "eps" : 1.0 })
    exploiting_agent.seed(np_random.randint(maxint))   
    #env.monitor.start(AGENT_TAG)
    episodes_pr_node = EPISODES_PR_EPOCH / nb_nodes    
    for epoch in range(1, EPOCHS):
        verbose = epoch >= EPOCHS_VERBOSE_INTERVAL and epoch % EPOCHS_VERBOSE_INTERVAL == 0
        if verbose:
            print("\nEpoch {}".format(epoch))

        dview.scatter('base_seeds', np_random.randint(maxint, size=nb_nodes))
        result = dview.apply_async(simulate_episodes, episodes_pr_node)
        
        #epi = 1
        for _, episodes in enumerate(result):
            for episode in episodes:
                #if verbose: print("\r    episode: {}".format(epi))
                train_agent(exploiting_agent, episode.reset_obs, episode.feedbacks)
                #epi += 1

        if verbose:
            exploiting_agent.render() 

        save = epoch >= EPOCHS_SAVE_MODEL_INTERVAL and epoch % EPOCHS_SAVE_MODEL_INTERVAL == 0
        if save:
            print("\n Saving model... {}")
            exploiting_agent.save()
    
    #env.monitor.close()
    env.close()
Exemplo n.º 17
0
    def _seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        # Derive a random seed. This gets passed as a uint, but gets
        # checked as an int elsewhere, so we need to keep it below
        # 2**31.
        seed2 = seeding.hash_seed(seed1 + 1) % 2**31
        # Empirically, we need to seed before loading the ROM.
        self.ale.setInt(b'random_seed', seed2)
        self.ale.loadROM(self.game_path)
        self._action_set = self.ale.getMinimalActionSet()

        self.action_space = spaces.Discrete(len(self._action_set), np_random=self.np_random)

        (screen_width,screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255, np_random=self.np_random)
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), np_random=self.np_random)
        else:
            raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
        return [seed1, seed2]
Exemplo n.º 18
0
def simulate_episodes(nb_episodes):
    from sys import maxint
    from gym.utils import seeding
    from pentago_ai import RandomAgent, PentagoEnv, TabularQAgent, Episode

    np_random, _ = seeding.np_random(base_seeds[0])

    opponent_policy = RandomAgent("Player 2 Random")
    opponent_policy.seed(np_random.randint(maxint))
    env = PentagoEnv(SIZE, opponent_policy, agent_starts = AGENT_STARTS, to_win=SIZE)
    env.seed(np_random.randint(maxint))

    exploration_policy = RandomAgent("Player 1 Random")
    exploration_policy.seed(np_random.randint(maxint))
    exploring_agent = TabularQAgent(env, AGENT_TAG, exploration_policy, load_model=True, userconfig={ "eps" : 0.3 })
    exploring_agent.seed(np_random.randint(maxint))

    episodes = []
    for epi in range(nb_episodes):
        episodes.append(simulate_episode(env, exploring_agent))
    return episodes
Exemplo n.º 19
0
 def seed(self, seed=None):
     # 产生一个随机化时需要的种子,同时返回一个np_random对象,支持后续的随机化生成操作
     self.np_random, seed = seeding.np_random(seed)
     return [seed]
Exemplo n.º 20
0
 def seed(self, seed=None):
     gym.spaces.prng.seed(seed)
     self.np_random, seed = seeding.np_random(seed)
     return [seed]
Exemplo n.º 21
0
 def _seed(self, seed=None):
     self.np_random, seed1 = seeding.np_random(seed)
     # Derive a random seed.
     seed2 = seeding.hash_seed(seed1 + 1) % 2 ** 32
     pachi_py.pachi_srand(seed2)
     return [seed1, seed2]
Exemplo n.º 22
0
 def _seed(self, seed=0):
     self.np_random, seed = seeding.np_random(seed)
     return [seed]
Exemplo n.º 23
0
def test_valid_seeds():
    for seed in [0, 1]:
        random, seed1 = seeding.np_random(seed)
        assert seed == seed1
Exemplo n.º 24
0
 def _seed(self, seed=None):
     self.np_random, seed = seeding.np_random(seed)
     self.game._seed(self.np_random)
     return [seed]
 def seed(self, seed=None):
     self.np_random, seed1 = seeding.np_random(seed)
     seed2 = seeding.hash_seed(seed1 + 1) % 2 ** 31
     return [seed1, seed2]
Exemplo n.º 26
0
 def seed(self, seed=1337):
     # Seed the random number generator
     self.np_random, _ = seeding.np_random(seed)
     return [seed]
Exemplo n.º 27
0
 def _seed(self, seed=None):
     self.np_random, seed = seeding.np_random(seed)
     high = np.array([np.inf]*24)
     self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]), np_random=self.np_random)
     self.observation_space = spaces.Box(-high, high, np_random=self.np_random)
     return [seed]
Exemplo n.º 28
0
 def seed(self, seed=None):
     self.np_random, seed = seeding.np_random(seed)
     return [seed]
Exemplo n.º 29
0
 def _seed(self, seed=None):
     self.np_random, seed = seeding.np_random(seed)
     self.action_space = spaces.Discrete(3, np_random=self.np_random)
     self.observation_space = spaces.Box(self.low, self.high, np_random=self.np_random)
     return [seed]
Exemplo n.º 30
0
 def _seed(self, seed=None):
     self.np_random, seed = seeding.np_random(seed)
     self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), np_random=self.np_random)  # steer, gas, brake
     self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), np_random=self.np_random)
     return [seed]
Exemplo n.º 31
0
 def seed(self, seed=None):
     self.np_random, seed = seeding.np_random(seed)
     return [seed]
Exemplo n.º 32
0
 def seed(self, seed=None):
     self.np_random = seeding.np_random(seed)