def compatible_2(): print('+++++++++++++++++++++++++++++++++++++++++++++++++') fruit_env = GymEnvironment(env_name='CartPole-v1') state = fruit_env.get_state_space() print(state.get_range()) print(tuple(state.get_shape())) print(fruit_env.get_action_space().get_range()) print(fruit_env.reset()) print(fruit_env.get_state()) print('+++++++++++++++++++++++++++++++++++++++++++++++++') print('+++++++++++++++++++++++++++++++++++++++++++++++++') env = OpenAIGym(level='CartPole-v1') state = env.states() print(state) print(env.actions()) print(env.reset()) print(env.execute(0)) print(env.max_episode_timesteps()) print('+++++++++++++++++++++++++++++++++++++++++++++++++') print('+++++++++++++++++++++++++++++++++++++++++++++++++') env = TFEnvironment(fruit_environment=fruit_env) print(env.states()) print(env.actions()) print(env.getrobotics_states()) print(env.execute(0)) print(env.max_episode_timesteps()) print('+++++++++++++++++++++++++++++++++++++++++++++++++')
def main(): #Creates a log for MineRL #logging.basicConfig(level=logging.DEBUG) # Create the environment ENV_NAME = "MineRLTreechop-v0" # Pre-defined or custom environment env = gym.make(ENV_NAME) environment = OpenAIGym(env) agent = Agent.create(agent='ac', environment=environment, max_episode_timesteps=8000, exploration=.03, critic_optimizer='evolutionary') sum_rewards = 0.0 rewards_by_episode = [] for _ in range(200): states = environment.reset() terminal = False print("Training episode " + str(_)) while not terminal: actions = agent.act(states=states, evaluation=True) states, terminal, reward = environment.execute(actions=actions) sum_rewards += reward #print(actions) print("Sum reward so far: " + str(sum_rewards)) rewards_by_episode.append((_, sum_rewards)) print("Ending episode ", _) print(rewards_by_episode) print('Mean episode reward:', sum_rewards / 200) agent.close() environment.close()
dict(type='dense', size=64), dict(type='dense', size=64) ], learning_rate=1e-3, name='agent_loader' ) # import ipdb;ipdb.set_trace() agent = agent.load() running_score = 0.0 # Train for 300 episodes for i_epoch in range(50000): game_score = 0.0 # Initialize episode states = environment.reset() terminal = False while not terminal: # Episode timestep actions = agent.act(states=states,evaluation=True) states, terminal, reward = environment.execute(actions=actions) game_score+=reward # agent.observe(terminal=terminal, reward=reward) running_score = 0.95*running_score + 0.05*game_score if i_epoch%5==0: print("Game ", i_epoch, " game score %.2f"%game_score," running score %.2f"%running_score)