Пример #1
0
def evaluate(actor_main, env, control_stepsize, state_dim, action_dim):

    timestep = env.reset()
    _, _, _, s = timestep
    s = torch.FloatTensor(utils.state_1d_flat(s)).to(device)
    action_dim = actor_main.action_dim

    step_i = 0
    ep_reward = 0
    prev_action = np.zeros([action_dim])

    while step_i < 1000:
        with torch.no_grad():
            a = actor_main.forward(s.view(-1, state_dim)).cpu().numpy()[0]

        for _ in range(control_stepsize):
            timestep = env.step(np.reshape(prev_action, (action_dim, )))
            step_i += 1

            if step_i > 1000:
                break
        if step_i > 1000:
            break

        t, r, _, s2 = timestep
        s2 = torch.FloatTensor(utils.state_1d_flat(s2)).to(device)

        s = s2
        ep_reward += r * control_stepsize  # normalize episode reward approximately 1000
        prev_action = a

    return ep_reward
def evaluate(actor_main, env, control_stepsize, state_dim,action_dim,actions_per_control ,video_info = None):

    action_stepsize = int(control_stepsize / actions_per_control)

    timestep = env.reset()
    _, _, _, s = timestep
    prev_action = np.zeros([actions_per_control,action_dim])

    s = utils.state_1d_flat(s)
    s_a = np.append(s, prev_action.reshape([-1]))
    s_a = torch.FloatTensor(s_a).to(device)

    step_i = 0
    ep_reward = 0

    if video_info is not None:
        video_dir = video_info[0]
        epi_i = video_info[1]

        video_filepath = os.path.join(video_dir,"training_"+str(epi_i)+".avi")
        video_saver = utils.VideoSaver(video_filepath, int(1.0/env.control_timestep()), 30, width=320, height=240)

        frame = env.physics.render(camera_id=0, width=320,height=240)
        video_saver.write(utils.RGB2BGR(frame))

    while step_i < 1000:
        with torch.no_grad():
            a = actor_main.forward(s_a.view(-1,state_dim)).cpu().numpy()[0]
            actions = a.reshape([actions_per_control,action_dim])

        for action_iter in range(actions_per_control):
            for _ in range(action_stepsize):
                timestep = env.step(prev_action[action_iter])
                step_i += 1

                if video_info is not None:
                    frame = env.physics.render(camera_id=0, width=320, height=240)
                    video_saver.write(utils.RGB2BGR(frame))

                if step_i > 1000:
                    break
            if step_i > 1000:
                break
        if step_i > 1000:
            break

        t, r, _, s2 = timestep
        s2 = utils.state_1d_flat(s2)
        s2_a = np.append(s2, actions.reshape([-1]))
        s2_a = torch.FloatTensor(s2_a).to(device)

        s_a = s2_a
        ep_reward += r
        prev_action = actions

    if video_info is not None:
        video_saver.release()

    return ep_reward
Пример #3
0
def evaluate(actor_main,
             env,
             control_stepsize,
             state_dim,
             action_dim,
             video_info=None):

    timestep = env.reset()
    _, _, _, s = timestep
    s = torch.FloatTensor(utils.state_1d_flat(s)).to(device)

    step_i = 0
    ep_reward = 0

    if video_info is not None:
        video_dir = video_info[0]
        epi_i = video_info[1]

        video_filepath = os.path.join(video_dir,
                                      "training_" + str(epi_i) + ".avi")
        video_saver = utils.VideoSaver(video_filepath,
                                       int(1.0 / env.control_timestep()),
                                       30,
                                       width=320,
                                       height=240)

        frame = env.physics.render(camera_id=0, width=320, height=240)
        video_saver.write(utils.RGB2BGR(frame))

    while step_i < 1000:
        with torch.no_grad():
            a = actor_main.forward(s.view(-1, state_dim)).cpu().numpy()[0]

        for _ in range(control_stepsize):
            timestep = env.step(np.reshape(a, (action_dim, )))
            step_i += 1

            if video_info is not None:
                frame = env.physics.render(camera_id=0, width=320, height=240)
                video_saver.write(utils.RGB2BGR(frame))

            if step_i > 1000:
                break
        if step_i > 1000:
            break

        t, r, _, s2 = timestep
        s2 = torch.FloatTensor(utils.state_1d_flat(s2)).to(device)

        s = s2
        ep_reward += r

    if video_info is not None:
        video_saver.release()

    return ep_reward
Пример #4
0
        assert noise_type in ["ou", "gaussian"]
        if noise_type == "ou":
            noise = Noise.OrnsteinUhlenbeckActionNoise(
                mu=np.zeros([action_dim]), sigma=sigma * np.ones([action_dim]))
        else:
            noise = Noise.GaussianNoise(action_dim=action_dim, sigma=sigma)

        noise.reset()
        timestep = env.reset()
        ep_reward = 0.0
        prev_action = np.zeros([action_dim])

        # timestep, reward, discount, observation
        _, _, _, s = timestep
        s = utils.state_1d_flat(s)

        s_a = np.append(s, prev_action)
        s_a = torch.FloatTensor(s_a).to(device)

        # for recording
        if epi_i % video_save_period == 1:
            video_filepath = os.path.join(
                record_dir, "training_noise_" + str(epi_i) + ".avi")
            video_saver = utils.VideoSaver(video_filepath,
                                           int(1.0 / env.control_timestep()),
                                           30,
                                           width=320,
                                           height=240)

            frame = env.physics.render(camera_id=0, width=320, height=240)
Пример #5
0
        assert noise_type in ["ou", "gaussian"]
        if noise_type == "ou":
            noise = Noise.OrnsteinUhlenbeckActionNoise(
                mu=np.zeros([action_dim]), sigma=sigma * np.ones([action_dim]))
        else:
            noise = Noise.GaussianNoise(action_dim=action_dim, sigma=sigma)

        noise.reset()
        timestep = env.reset()
        ep_reward = 0.0
        prev_action = np.zeros([action_dim])

        # timestep, reward, discount, observation
        _, _, _, s = timestep

        s = torch.FloatTensor(utils.state_1d_flat(s)).to(device)

        if epi_i % video_save_period == 1:
            video_filepath = os.path.join(
                record_dir, "training_noise_" + str(epi_i) + ".avi")
            video_saver = utils.VideoSaver(video_filepath,
                                           int(1.0 / env.control_timestep()),
                                           30,
                                           width=320,
                                           height=240)

            frame = env.physics.render(camera_id=0, width=320, height=240)
            video_saver.write(utils.RGB2BGR(frame))

        step_i = 0
        while step_i < 1000: