示例#1
0
def run(args):
    if args.machine == "Mac":
        env = UnityEnvironment(file_name='./Reacher.app',seed=1)
    else :
        env = UnityEnvironment(file_name='./Reacher_Linux_NoVis/Reacher.x86_64',seed=1)

    if torch.cuda.is_available():
        device = torch.device('cuda')
    else :
        device = torch.device('cpu')
    print("using device", device)
    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    # reset the environment
    env_info = env.reset(train_mode=False)[brain_name]

    # number of agents
    num_agents = len(env_info.agents)
    print('Number of agents:', num_agents)

    # size of each action
    action_size = brain.vector_action_space_size
    print('Size of each action:', action_size)

    # examine the state space 
    states = env_info.vector_observations
    state_size = states.shape[1]
    print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
    print('The state for the first agent looks like:', states[0])
    #==========================my version=========================
    agent = Agent(a_dim=4, s_dim=33, clip_value=1, device=device) # continuous action clip
    agent.load("./pretrained/")
    eval(env, agent, brain_name)
    env.close()
示例#2
0
NOISE_C = 1.1
first_ep = 0

with tf.device('/GPU:0'):
    env = Environment("data/u20.txt", SEED)
    # env = gym.wrappers.Monitor(e.env, 'video/', video_callable=lambda episode_id: True,force = True)
    # video = VideoRecorder(env, "video.mp4"
    state_shape = env.state_shape
    action_len = env.action_shape[0]
    action_scale = None
    NOISE = 0.6
    # np.random.seed(SEED)

    agent = Agent(state_shape, action_len, action_scale)
    if continued:
        agent.load(path)
    agent.summary()

    for episode in range(first_ep, EPISODES):
        state = env.reset()
        state = np.reshape(state, state_shape)
        score = 0
        # print(state)
        # done = False
        noise = np.random.normal(NOISE, NOISE / 2,
                                 2) / (1 + pow(NOISE_C, episode + 10))
        for st in range(MAX_STEPS):
            # while not done :
            #     env.render()
            # video.capture_frame()
            action = agent.act(state)