예제 #1
0
    def __init__(self, initial):
        """ Remplit la grille avec des valeurs qui respectent les carrés."""

        state = np.array([frozenset(range(1, 10) if k == 0 else [k])
                          for k in initial]).reshape((9,9))

        state = normalize_state(state)

        self.initial = tuple(state.flatten())
예제 #2
0
    def result(self, state, action):
        """
        Calcule la configuration résultante à appliquer une action sur une
        configuration.

        Le nouvel état est une copie modifiée de l'état passé en argument.
        """
        state = numpify_state(state)

        i, j, k = action

        state.itemset((i, j), frozenset([k]))

        # normalize
        normalized_state = normalize_state(state)

        return tuple(normalized_state.flatten())
예제 #3
0
    env_name = "Boxing-ram-v0"

try:
    model_filename = sys.argv[2]
except IndexError:
    model_filename = "Boxing-ram-v0_10000e.h5"

try:
    num_episodes = int(sys.argv[3])
except IndexError:
    num_episodes = 10

print("Playing agent {} for {} episodes.".format(model_filename, num_episodes))

env = gym.make(env_name)
agent = DQNAgent(env, epsilon=1.0, model_filename=model_filename)

for i_episode in range(num_episodes):
    state = normalize_state(env.reset())
    done = False
    total_reward = 0
    while not done:
        env.render()
        action = agent.act(state)
        state, reward, done, info = env.step(action)
        state = normalize_state(state)
        total_reward += reward
    print("Episode {} reward: {}".format(i_episode + 1, total_reward))

env.close()
예제 #4
0
파일: train.py 프로젝트: peustr/OpenAI-RL
    num_episodes = int(sys.argv[2])
except IndexError:
    num_episodes = 2000

env = gym.make(env_name)
agent = DQNAgent(env)

for i_episode in range(num_episodes):
    # For timing every episode.
    ts_start = datetime.now()
    # For tracking accumulative reward.
    total_reward = 0

    lives = env.env.ale.lives()

    state = normalize_state(env.reset())
    done = False
    while not done:
        # Comment out env.render() for faster training.
        # env.render()
        action = agent.act(state)
        next_state, reward, done, info = env.step(action)
        next_state = normalize_state(next_state)
        # If a life is lost, pass terminal state
        if info["ale.lives"] < lives:
            lives = info["ale.lives"]
            done = True
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward