Exemplo n.º 1
0
def main():
    #Creates a log for MineRL
    #logging.basicConfig(level=logging.DEBUG)

    # Create the environment
    ENV_NAME = "MineRLTreechop-v0"

    # Pre-defined or custom environment
    env = gym.make(ENV_NAME)

    environment = OpenAIGym(env)

    agent = Agent.create(agent='ac',
                         environment=environment,
                         max_episode_timesteps=8000,
                         exploration=.03,
                         critic_optimizer='evolutionary')

    sum_rewards = 0.0
    rewards_by_episode = []
    for _ in range(200):
        states = environment.reset()
        terminal = False
        print("Training episode " + str(_))
        while not terminal:
            actions = agent.act(states=states, evaluation=True)
            states, terminal, reward = environment.execute(actions=actions)
            sum_rewards += reward
            #print(actions)
        print("Sum reward so far: " + str(sum_rewards))
        rewards_by_episode.append((_, sum_rewards))
        print("Ending episode ", _)
    print(rewards_by_episode)
    print('Mean episode reward:', sum_rewards / 200)

    agent.close()
    environment.close()
Exemplo n.º 2
0
    learning_rate=1e-3,
    name='agent_loader'

)
# import ipdb;ipdb.set_trace()
agent = agent.load()

running_score = 0.0
# Train for 300 episodes
for i_epoch in range(50000):
    game_score = 0.0
    # Initialize episode
    states = environment.reset()
    terminal = False

    while not terminal:
        # Episode timestep
        actions = agent.act(states=states,evaluation=True)
        states, terminal, reward = environment.execute(actions=actions)
        game_score+=reward
        # agent.observe(terminal=terminal, reward=reward)

    running_score = 0.95*running_score + 0.05*game_score
    if i_epoch%5==0:
        print("Game ", i_epoch, "       game score %.2f"%game_score,"       running score %.2f"%running_score)



agent.close()
environment.close()