示例#1
0
def compatible_2():
    print('+++++++++++++++++++++++++++++++++++++++++++++++++')
    fruit_env = GymEnvironment(env_name='CartPole-v1')
    state = fruit_env.get_state_space()
    print(state.get_range())
    print(tuple(state.get_shape()))
    print(fruit_env.get_action_space().get_range())
    print(fruit_env.reset())
    print(fruit_env.get_state())
    print('+++++++++++++++++++++++++++++++++++++++++++++++++')

    print('+++++++++++++++++++++++++++++++++++++++++++++++++')
    env = OpenAIGym(level='CartPole-v1')
    state = env.states()
    print(state)
    print(env.actions())
    print(env.reset())
    print(env.execute(0))
    print(env.max_episode_timesteps())
    print('+++++++++++++++++++++++++++++++++++++++++++++++++')

    print('+++++++++++++++++++++++++++++++++++++++++++++++++')
    env = TFEnvironment(fruit_environment=fruit_env)
    print(env.states())
    print(env.actions())
    print(env.getrobotics_states())
    print(env.execute(0))
    print(env.max_episode_timesteps())
    print('+++++++++++++++++++++++++++++++++++++++++++++++++')
示例#2
0
def main():
    #Creates a log for MineRL
    #logging.basicConfig(level=logging.DEBUG)

    # Create the environment
    ENV_NAME = "MineRLTreechop-v0"

    # Pre-defined or custom environment
    env = gym.make(ENV_NAME)

    environment = OpenAIGym(env)

    agent = Agent.create(agent='ac',
                         environment=environment,
                         max_episode_timesteps=8000,
                         exploration=.03,
                         critic_optimizer='evolutionary')

    sum_rewards = 0.0
    rewards_by_episode = []
    for _ in range(200):
        states = environment.reset()
        terminal = False
        print("Training episode " + str(_))
        while not terminal:
            actions = agent.act(states=states, evaluation=True)
            states, terminal, reward = environment.execute(actions=actions)
            sum_rewards += reward
            #print(actions)
        print("Sum reward so far: " + str(sum_rewards))
        rewards_by_episode.append((_, sum_rewards))
        print("Ending episode ", _)
    print(rewards_by_episode)
    print('Mean episode reward:', sum_rewards / 200)

    agent.close()
    environment.close()
示例#3
0
        dict(type='dense', size=64),
        dict(type='dense', size=64)
    ],
    learning_rate=1e-3,
    name='agent_loader'

)
# import ipdb;ipdb.set_trace()
agent = agent.load()

running_score = 0.0
# Train for 300 episodes
for i_epoch in range(50000):
    game_score = 0.0
    # Initialize episode
    states = environment.reset()
    terminal = False

    while not terminal:
        # Episode timestep
        actions = agent.act(states=states,evaluation=True)
        states, terminal, reward = environment.execute(actions=actions)
        game_score+=reward
        # agent.observe(terminal=terminal, reward=reward)

    running_score = 0.95*running_score + 0.05*game_score
    if i_epoch%5==0:
        print("Game ", i_epoch, "       game score %.2f"%game_score,"       running score %.2f"%running_score)