def main(): #Creates a log for MineRL #logging.basicConfig(level=logging.DEBUG) # Create the environment ENV_NAME = "MineRLTreechop-v0" # Pre-defined or custom environment env = gym.make(ENV_NAME) environment = OpenAIGym(env) agent = Agent.create(agent='ac', environment=environment, max_episode_timesteps=8000, exploration=.03, critic_optimizer='evolutionary') sum_rewards = 0.0 rewards_by_episode = [] for _ in range(200): states = environment.reset() terminal = False print("Training episode " + str(_)) while not terminal: actions = agent.act(states=states, evaluation=True) states, terminal, reward = environment.execute(actions=actions) sum_rewards += reward #print(actions) print("Sum reward so far: " + str(sum_rewards)) rewards_by_episode.append((_, sum_rewards)) print("Ending episode ", _) print(rewards_by_episode) print('Mean episode reward:', sum_rewards / 200) agent.close() environment.close()
learning_rate=1e-3, name='agent_loader' ) # import ipdb;ipdb.set_trace() agent = agent.load() running_score = 0.0 # Train for 300 episodes for i_epoch in range(50000): game_score = 0.0 # Initialize episode states = environment.reset() terminal = False while not terminal: # Episode timestep actions = agent.act(states=states,evaluation=True) states, terminal, reward = environment.execute(actions=actions) game_score+=reward # agent.observe(terminal=terminal, reward=reward) running_score = 0.95*running_score + 0.05*game_score if i_epoch%5==0: print("Game ", i_epoch, " game score %.2f"%game_score," running score %.2f"%running_score) agent.close() environment.close()