Exemplo n.º 1
0
    print("Loaded weight from {}\n".format(weight_path))
    start = time.time()
    env.reset()
    reward_ll_sum = 0
    done_sum = 0
    average_dones = 0.
    n_steps = math.floor(cfg['environment']['max_time'] /
                         cfg['environment']['control_dt'])
    total_steps = n_steps * 1
    start_step_id = 0

    print("Visualizing and evaluating the policy", weight_path + ".pt")
    loaded_graph = torch.jit.load(weight_path + '.pt')

    env.load_scaling(
        weight_path.rsplit('/', 1)[0],
        int(weight_path.rsplit('/', 1)[1].split('_', 1)[1]))
    print(
        "Load observation scaling in",
        weight_path.rsplit('/', 1)[0] + ":", "mean" +
        str(int(weight_path.rsplit('/', 1)[1].split('_', 1)[1])) + ".csv",
        "and", "var" +
        str(int(weight_path.rsplit('/', 1)[1].split('_', 1)[1])) + ".csv")
    env.turn_on_visualization()

    # max_steps = 1000000
    max_steps = 1000  ## 10 secs
    for step in range(max_steps):
        time.sleep(0.01)
        obs = env.observe(False)
        action_ll = loaded_graph(torch.from_numpy(obs).cpu())
Exemplo n.º 2
0
                                    '{:6.4f}'.format(end - start)))
        print('{:<40} {:>6}'.format(
            "fps: ", '{:6.0f}'.format(total_steps / (end - start))))
        print('std: ')
        print(np.exp(actor.distribution.std.cpu().detach().numpy()))
        print('----------------------------------------------------\n')

if test_mode:
    curriculum_setting = True
    save_dir = os.environ['WORKSPACE'] + "/ME491TermProject/data/~~~~~~~~"
    test_policy = 1000
    env.turn_on_visualization()
    env.start_video_recording(
        datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + "policy_" +
        str(test_policy) + '.mp4')
    env.load_scaling(save_dir, test_policy)
    if curriculum_setting:
        if test_policy > curriculum_start:
            for i in range(test_policy - curriculum_start):
                env.curriculum_callback()

    loaded_graph = torch.jit.load(save_dir + "/policy_" + str(test_policy) +
                                  '.pt')
    print("load_graph")
    dones = False
    steps = 0
    env.reset()
    time.sleep(1)
    while (not dones) and steps < 2 * n_steps:
        obs = env.observe(False)
        action_ll = loaded_graph(torch.from_numpy(obs).cpu())
Exemplo n.º 3
0
    env.reset()
    reward_ll_sum = 0
    done_sum = 0
    average_dones = 0.
    n_steps = math.floor(cfg['environment']['max_time'] /
                         cfg['environment']['control_dt'])
    total_steps = n_steps * 1
    start_step_id = 0

    print("Visualizing and evaluating the policy: ", weight_path)
    loaded_graph = ppo_module.MLP(cfg['architecture']['policy_net'],
                                  torch.nn.LeakyReLU, ob_dim, act_dim)
    loaded_graph.load_state_dict(
        torch.load(weight_path)['actor_architecture_state_dict'])

    env.load_scaling(weight_dir, int(iteration_number))
    env.turn_on_visualization()

    # max_steps = 1000000
    max_steps = 1000  ## 10 secs

    for step in range(max_steps):
        time.sleep(0.01)
        obs = env.observe(False)
        action_ll = loaded_graph.architecture(torch.from_numpy(obs).cpu())
        reward_ll, dones = env.step(action_ll.cpu().detach().numpy())
        reward_ll_sum = reward_ll_sum + reward_ll[0]
        if dones or step == max_steps - 1:
            print('----------------------------------------------------')
            print('{:<40} {:>6}'.format(
                "average ll reward: ",