Exemplo n.º 1
0
def train(game):
    agent = DQN(game)

    for i in tqdm(range(TRAIN_GAMES)):
        game.new_episode()
        previous_variables = None
        previous_img = None
        done = False
        local_history = []
        total_reward = 0
        while not done:
            state = game.get_state()

            img = state.screen_buffer
            variables = state.game_variables
            if previous_variables is None:
                previous_variables = variables
            if previous_img is None:
                previous_img = img

            action = agent.act(img)
            reward = game.make_action(action)
            done = game.is_episode_finished()
            reward = (reward + calculate_additional_reward(previous_variables, variables)) / 100
            total_reward += reward
            local_history.append([previous_img, img, reward, action, done])
            previous_variables = variables
            previous_img = img

        if total_reward >= 0:
            for previous_state, state, reward, action, done in local_history:
                agent.remember(previous_state, state, reward, action, done)
            agent.train()
Exemplo n.º 2
0
def run(ep,train=False):
    pygame.init()
    loss=[]
    agent = DQN(3, 5)
    env=pongGame()
    weights_filepath = 'PongGame.h5'
    if train==False:
        agent.model.load_weights(weights_filepath)
        print("weights loaded")
    for e in range(ep):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        state = env.reset()
        state = np.reshape(state, (1, 5))
        score = 0
        max_steps = 1000
        for i in range(max_steps):
            action = agent.act(state)
            reward, next_state, done = env.step(action)
            score += reward
            next_state = np.reshape(next_state, (1, 5))
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            if train==True:
                agent.replay()
            if done:
                print("episode: {}/{}, score: {}".format(e, ep, score))
                break
        loss.append(score)
    if train:
        agent.model.save_weights("PongGame.h5")
    return loss
Exemplo n.º 3
0
def train_dqn(episodes, env, render_frequency=0):
    now = datetime.datetime.now()
    id = f'{now.hour}{now.minute}'
    episode_rewards = []
    agent = DQN(env, params)
    best_score = 0
    for episode in range(episodes):
        rendering = render_frequency and episode % render_frequency == 0 and isinstance(
            env, HeadlessSnake)

        state = env.reset(
        )  # Reset enviroment before each episode to start fresh

        if rendering:
            renderer = Renderer(env, episode + 1)

        env.update_episode(episode + 1)
        # state = np.reshape(state, (1, env.state_space))
        total_reward = 0
        max_steps = 10000
        for step in range(max_steps):
            # 1. Find next action using the Epsilon-Greedy exploration Strategy
            action = agent.get_action(state)

            # 2. perform action in enviroment
            next_state, reward, done, _ = env.step(action)
            total_reward += reward
            # next_state = np.reshape(next_state, (1, env.state_space))

            if rendering:
                renderer.update()

            # 3. Update the Q-function (train model)
            agent.remember(state, action, reward, next_state, done)
            agent.train_with_experience_replay()

            # 4. Change exploration vs. explotation probability
            agent.update_exploration_strategy(episode)
            state = next_state

            if done:
                print(
                    f'episode: {episode+1}/{episodes}, score: {total_reward}, steps: {step}, '
                    f'epsilon: {agent.epsilon}, highscore: {env.maximum}')
                save_model(id, agent, best_score, total_reward)
                break

        if rendering:
            renderer.bye()

        save_model(id, agent, best_score, total_reward)
        episode_rewards.append(total_reward)
    return episode_rewards
Exemplo n.º 4
0
        while True:
            action = agent.act(pre_ob, step=i)

            ob, reward, done, _ = env.step(action)
            if reward <= -1:
                reward = -1

            next_pre_ob = preprocess(ob)

            # Stack observations
            next_pre_ob = next_pre_ob.reshape(1, 100, 100)
            ob_stack = np.insert(ob_stack, -1, next_pre_ob, axis=3)
            ob_stack = np.delete(ob_stack, 0, axis=3)
            next_pre_ob = ob_stack

            agent.remember(pre_ob, action, reward, next_pre_ob, done)
            agent.replay()
            pre_ob = next_pre_ob
            score = score + reward

            if done:
                break

        scores.append(score)
        print("Episode {} score: {}".format(i + 1, score))
        mean_score = np.mean(scores)

        if (i + 1) % 5 == 0:
            print(
                "Episode {}, score: {}, exploration at {}%, mean of last 100 episodes was {}"
                .format(i + 1, score, agent.epsilon * 100, mean_score))