Пример #1
0
def train():
    env = gym.make('CartPole-v0')
    agent = DQNAgent(env=env)
    num_episodes = 200
    for i_episode in range(num_episodes):
        state = env.reset()
        total_reward = 0
        while True:
            action = agent.get_action(state)
            next_state, reward, done, info = env.step(action)
            total_reward += reward
            update_array = [state, action, reward, next_state, done]
            agent.update(update_array)
            state = next_state
            if done:
                print("Episode ", i_episode, ": ", total_reward, "  epsilon: ", agent.epsilon)
                break
    agent.save('myClassModel')
    env.close()
    agent = DQNAgent(env, log_dir=log_dir)
    # agent.load(log_dir)
    episode_rewards = []

    ep = 0
    obs = env.reset()
    episode_reward = 0
    for frame in range(Config.MAX_FRAMES):
        # print("frame", frame)
        # env.render()
        epsilon = Config.epsilon_by_frame(frame)
        action = agent.get_action(obs, epsilon)
        prev_obs = obs
        obs, reward, done, _ = env.step(action)
        episode_reward += reward
        agent.update(prev_obs, action, reward, obs, frame)
        if done:
            episode_rewards.append(episode_reward)
            agent.writer.add_scalar("data/reward", episode_reward, ep)
            print("episode", ep, "reward:", episode_reward)
            ep += 1
            obs = env.reset()
            episode_reward = 0
        if ep % 50 == 0:
            agent.save(log_dir)

    agent.save(log_dir)
    env.close()
    agent.writer.close()