def main(args):
    r = redis.Redis(host='10.10.1.2', port=6379, db=0)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_obs_dim, model_output_dim = 4, 6
    # model_obs_dim, model_output_dim = np.size(utils.HL_obs(state)), np.size(utils.HL_delta_obs(state, state))
    utils.make_dir(args.save_dir)
    HL_replay_buffer = utils.ReplayBuffer(
        model_obs_dim, args.z_dim, model_output_dim, device,
        args.num_iters * args.num_latent_action_per_iteration)
    HL_replay_buffer.load_buffer('./save_data/trial_4')
    # HL_replay_buffer.idx = 1415
    high_level_planning = HLPM.high_level_planning(
        device=device,
        model_obs_dim=model_obs_dim,
        z_dim=args.z_dim,
        model_output_dim=model_output_dim,
        model_hidden_num=args.model_hidden_num,
        batch_size=args.batch_size,
        model_lr=args.model_lr,
        high_level_policy_type=args.high_level_policy_type,
        update_sample_policy=args.update_sample_policy,
        update_sample_policy_lr=args.update_sample_policy_lr,
        low_level_policy_type=args.low_level_policy_type,
        num_timestep_per_footstep=args.num_timestep_per_footstep,
        model_update_steps=args.model_update_steps,
        control_frequency=args.control_frequency)
    # collect_data_client(args, r, high_level_planning , HL_replay_buffer)

    train_model(args, HL_replay_buffer, high_level_planning)
def main(args):
    r = redis.Redis(host='10.10.1.2', port=6379, db=0)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_obs_dim, model_output_dim = 4, 6
    # model_obs_dim, model_output_dim = np.size(utils.HL_obs(state)), np.size(utils.HL_delta_obs(state, state))
    utils.make_dir(args.save_dir)
    save_dir = utils.make_dir(
        os.path.join(args.save_dir +
                     '/trial_%s' % str(args.seed))) if args.save else None

    high_level_planning = HLPM.high_level_planning(
        device=device,
        model_obs_dim=model_obs_dim,
        z_dim=args.z_dim,
        model_output_dim=model_output_dim,
        model_hidden_num=args.model_hidden_num,
        batch_size=args.batch_size,
        model_lr=args.model_lr,
        high_level_policy_type=args.high_level_policy_type,
        update_sample_policy=args.update_sample_policy,
        update_sample_policy_lr=args.update_sample_policy_lr,
        low_level_policy_type=args.low_level_policy_type,
        num_timestep_per_footstep=args.num_timestep_per_footstep,
        model_update_steps=args.model_update_steps,
        control_frequency=args.control_frequency)
    high_level_planning.load_data(save_dir)
    high_level_planning.load_mean_var(save_dir + '/buffer_data')

    evaluation(args, r, high_level_planning)
def main(cfg):
    # define env & high level planning part & low level trajectory generator & replay buffer for HLP
    # initialize logger
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    env = daisy_API(sim=cfg.sim, render=False, logger=False)
    env.set_control_mode(cfg.control_mode)
    state = env.reset()
    com_utils = utils.CoM_frame_MPC()

    if cfg.sim:
        init_state = motion_library.exp_standing(env)

    model_obs_dim, model_output_dim = np.size(
        com_utils.HL_obs(state)), np.size(com_utils.HL_delta_obs(state, state))

    HL_replay_buffer = utils.ReplayBuffer(
        model_obs_dim, cfg.z_dim, model_output_dim, device,
        cfg.num_iters * cfg.num_latent_action_per_iteration)

    high_level_planning = HLPM.high_level_planning(
        device=device,
        model_obs_dim=model_obs_dim,
        z_dim=cfg.z_dim,
        model_output_dim=model_output_dim,
        model_hidden_num=cfg.model_hidden_num,
        model_layer_num=cfg.model_layer_num,
        batch_size=cfg.batch_size,
        model_lr=cfg.model_lr,
        high_level_policy_type=cfg.high_level_policy_type,
        update_sample_policy=cfg.update_sample_policy,
        update_sample_policy_lr=cfg.update_sample_policy_lr,
        low_level_policy_type=cfg.low_level_policy_type,
        num_timestep_per_footstep=cfg.num_timestep_per_footstep,
        model_update_steps=cfg.model_update_steps,
        control_frequency=cfg.control_frequency)

    low_level_TG = LLTG.low_level_TG(
        device=device,
        z_dim=cfg.z_dim,
        a_dim=cfg.a_dim,
        num_timestep_per_footstep=cfg.num_timestep_per_footstep,
        batch_size=cfg.batch_size,
        low_level_policy_type=cfg.low_level_policy_type,
        update_low_level_policy=cfg.update_low_level_policy,
        update_low_level_policy_lr=cfg.update_low_level_policy_lr,
        init_state=init_state,
    )

    if cfg.low_level_policy_type == 'NN':
        low_level_TG.load_model('.')

    # # # collect data
    collect_data(cfg, env, high_level_planning, low_level_TG, HL_replay_buffer,
                 com_utils)

    # train model
    train_model(cfg, HL_replay_buffer, high_level_planning)
Exemplo n.º 4
0
def main(cfg):
    print(cfg.pretty())

    # define env & high level planning part & low level trajectory generator & replay buffer for HLP
    # initialize logger
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    env = daisy_API(sim=cfg.sim, render=False, logger=False)
    env.set_control_mode(cfg.control_mode)
    state = env.reset()

    if cfg.sim:
        init_state = motion_library.exp_standing(env)

    model_obs_dim, model_output_dim = 2, 5

    HL_replay_buffer = utils.ReplayBuffer(
        model_obs_dim, cfg.z_dim, model_output_dim, device,
        cfg.num_iters * cfg.num_latent_action_per_iteration)

    high_level_planning = HLPM.high_level_planning(
        device=device,
        model_obs_dim=model_obs_dim,
        z_dim=3,
        model_output_dim=model_output_dim,
        model_hidden_num=cfg.model_hidden_num,
        model_layer_num=2,
        batch_size=cfg.batch_size,
        model_lr=cfg.model_lr,
        high_level_policy_type='raibert',
        update_sample_policy=cfg.update_sample_policy,
        update_sample_policy_lr=cfg.update_sample_policy_lr,
        low_level_policy_type=cfg.low_level_policy_type,
        num_timestep_per_footstep=50,
        model_update_steps=cfg.model_update_steps,
        control_frequency=cfg.control_frequency)

    low_level_TG = LLTG.low_level_TG(
        device=device,
        z_dim=3,
        a_dim=cfg.a_dim,
        num_timestep_per_footstep=50,
        batch_size=cfg.batch_size,
        low_level_policy_type='IK',
        update_low_level_policy=cfg.update_low_level_policy,
        update_low_level_policy_lr=cfg.update_low_level_policy_lr,
        init_state=init_state,
    )

    # # if args.low_level_policy_type =='NN':
    # #     low_level_TG.load_model('./save_data/trial_2')

    # # # collect data
    collect_data(cfg, env, high_level_planning, low_level_TG, HL_replay_buffer)
def evaluate_model(cfg):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    env = daisy_API(sim=cfg.sim, render=cfg.render, logger = False)
    env.set_control_mode(cfg.control_mode)
    state = env.reset()
    com_utils = utils.CoM_frame_MPC()
    
    if cfg.sim:
        init_state = motion_library.exp_standing(env)
        
    model_obs_dim, model_output_dim = np.size(com_utils.HL_obs(state)), np.size(com_utils.HL_delta_obs(state, state))

    high_level_planning = HLPM.high_level_planning(
        device = device,
        model_obs_dim = model_obs_dim,
        z_dim = cfg.z_dim,
        model_output_dim = model_output_dim,
        model_hidden_num = cfg.model_hidden_num,
        model_layer_num = cfg.model_layer_num,
        batch_size = cfg.batch_size,
        model_lr = cfg.model_lr,
        high_level_policy_type = cfg.high_level_policy_type,
        update_sample_policy = cfg.update_sample_policy,
        update_sample_policy_lr = cfg.update_sample_policy_lr,
        low_level_policy_type = cfg.low_level_policy_type,
        num_timestep_per_footstep = cfg.num_timestep_per_footstep,
        model_update_steps = cfg.model_update_steps,
        control_frequency=cfg.control_frequency
    )
    high_level_planning.load_data('.')
    high_level_planning.load_mean_var('.'+'/buffer_data')
    
    low_level_TG = LLTG.low_level_TG(
        device = device,
        z_dim = cfg.z_dim,
        a_dim = cfg.a_dim,
        num_timestep_per_footstep = cfg.num_timestep_per_footstep,
        batch_size = cfg.batch_size,
        low_level_policy_type = cfg.low_level_policy_type, 
        update_low_level_policy = cfg.update_low_level_policy,
        update_low_level_policy_lr = cfg.update_low_level_policy_lr,
        init_state = init_state,
    )

    if cfg.low_level_policy_type =='NN':
        low_level_TG.load_model('.')

    prediction_evaluation = np.empty((2* model_output_dim,cfg.num_latent_action_per_iteration ))
    velocity_tracking = np.empty((cfg.num_iters,6,cfg.num_latent_action_per_iteration))

    
    target_velocity_test = [ 
                        np.array([0.0, 0.2, 0.0,1]),
                        np.array([0.0, 0.2, 0.0,1]),
                        np.array([-0.0, 0.2, 0.0,1]),
                        np.array([-0.2, 0.0,0.0,1]),
                        np.array([-0.2, 0.0, 0.0,1]),
                        np.array([-0.2, 0.0,0.0,1]),
                        np.array([0.0, -0.2,0.0,1]),                   
                        np.array([0.0, -0.2,0.0,1]),
                        np.array([0.0, -0.2, 0.0,1]),
                        np.array([0.0, -0.2, 0.0,1]),
                        np.array([0.2, -0.0, 0.0,1]),
                        np.array([0.2, -0.0, 0.0,1]),
                        np.array([0.2, -0.0, 0.0,1]),
                        np.array([0.2, -0.0, 0.0,1]),
                        np.array([0.2, -0.0,0.0,1]),
                      ]
    
    velocity_tracking = np.empty((cfg.num_iters, 6,cfg.num_latent_action_per_iteration))
    for iter in range(cfg.num_iters):
        prediction_evaluation = np.empty((2* model_output_dim,cfg.num_latent_action_per_iteration ))
        
        total_cost = 0
        init_state = motion_library.exp_standing(env)

        for j in range(cfg.num_latent_action_per_iteration):
            if not j%5:
                target = target_velocity_test[int((j+1)/5)]

            pre_com_state = state
            latent_action = high_level_planning.plan_latent_action(state, target, com_utils, cfg.mpc_sample_num, cfg.mpc_horizon)
            low_level_TG.update_latent_action(state,latent_action)
            
            for step in range(1, cfg.num_timestep_per_footstep+1):
                action = low_level_TG.get_action(state, step)
                state = env.step(action)

            post_com_state = state
            
            # Check if robot still alive
            high_level_obs , high_level_delta_obs = com_utils.HL_obs(pre_com_state), com_utils.HL_delta_obs(pre_com_state, post_com_state)
            predict_delta_state = high_level_planning.model_predict(high_level_obs, latent_action)
            predict_state_world = com_utils.predict2world(pre_com_state, predict_delta_state)

            # collect data
            velocity_tracking[iter][0][j] = target[0]
            velocity_tracking[iter][1][j] = target[1]
            velocity_tracking[iter][2][j] = predict_state_world[3]
            velocity_tracking[iter][3][j] = predict_state_world[4]
            velocity_tracking[iter][4][j] = post_com_state['base_velocity'][0]
            velocity_tracking[iter][5][j] = post_com_state['base_velocity'][1]

      
    np.save('./' + cfg.high_level_policy_type + '_velocity_tracking_fin.npy', velocity_tracking) 
    return 
def evaluate_model(args):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    env = daisy_API(sim=args.sim, render=False, logger=False)
    env.set_control_mode(args.control_mode)
    state = env.reset()
    utils.make_dir(args.save_dir)
    save_dir = utils.make_dir(
        os.path.join(args.save_dir +
                     '/trial_%s' % str(args.seed))) if args.save else None
    total_timestep, total_latent_action = 0, 0

    if args.sim:
        if args.low_level_policy_type == 'NN':
            init_state = motion_library.exp_standing(env,
                                                     shoulder=0.3,
                                                     elbow=1.3)
        else:
            init_state = motion_library.exp_standing(env)
    model_obs_dim, model_output_dim = np.size(utils.HL_obs(state)), np.size(
        utils.HL_delta_obs(state, state))

    high_level_planning = HLPM.high_level_planning(
        device=device,
        model_obs_dim=model_obs_dim,
        z_dim=args.z_dim,
        model_output_dim=model_output_dim,
        model_hidden_num=args.model_hidden_num,
        batch_size=args.batch_size,
        model_lr=args.model_lr,
        high_level_policy_type=args.high_level_policy_type,
        update_sample_policy=args.update_sample_policy,
        update_sample_policy_lr=args.update_sample_policy_lr,
        low_level_policy_type=args.low_level_policy_type,
        num_timestep_per_footstep=args.num_timestep_per_footstep,
        model_update_steps=args.model_update_steps,
        control_frequency=args.control_frequency)
    high_level_planning.load_data(save_dir)
    high_level_planning.load_mean_var(save_dir + '/buffer_data')

    low_level_TG = LLTG.low_level_TG(
        device=device,
        z_dim=args.z_dim,
        a_dim=args.a_dim,
        num_timestep_per_footstep=args.num_timestep_per_footstep,
        batch_size=args.batch_size,
        low_level_policy_type=args.low_level_policy_type,
        update_low_level_policy=args.update_low_level_policy,
        update_low_level_policy_lr=args.update_low_level_policy_lr,
        init_state=init_state,
    )
    if args.low_level_policy_type == 'NN':
        low_level_TG.load_model('./save_data/trial_2')

    prediction_evaluation = np.empty(
        (2 * model_output_dim, args.num_latent_action_per_iteration))
    velocity_tracking = np.empty((6, args.num_latent_action_per_iteration))

    target_velocity_test = [
        np.array([0.0, -0.1, 0.0]),
        np.array([0.0, -0.1, 0.0]),
        np.array([-0.00, -0.1, 0.0]),
        np.array([-0.0, -0.1, 0.0]),
        np.array([-0.0, -0.1, 0.0]),
        np.array([-0.00, -0.4, 0.0]),
        np.array([-0.00, -0.4, 0.0]),
        np.array([-0.0, -0.4, 0.0]),
        np.array([-0.0, -0.4, 0.0]),
        np.array([-0.0, -0.4, 0.0]),
    ]

    gait_record = np.empty(
        (args.num_iters, 6, args.num_latent_action_per_iteration *
         args.num_timestep_per_footstep))
    for iter in range(args.num_iters):
        # reset robot to stand
        if args.sim:
            state = motion_library.exp_standing(env, shoulder=0.3, elbow=1.3)
            low_level_TG.reset(state)

        for j in range(args.num_latent_action_per_iteration):
            if not j % 5:
                target = target_velocity_test[int((j + 1) / 5)]

            pre_com_state = state
            latent_action = high_level_planning.plan_latent_action(
                state, target)
            low_level_TG.update_latent_action(state, latent_action)

            for step in range(1, args.num_timestep_per_footstep + 1):
                action = low_level_TG.get_action(state, step)
                state, total_timestep = env.step(action), total_timestep + 1
                for q in range(6):
                    if state['foot_pos'][q][2] <= -0.005:
                        gait_record[iter][q][j * args.num_timestep_per_footstep
                                             + step - 1] = 1
                    else:
                        gait_record[iter][q][j * args.num_timestep_per_footstep
                                             + step - 1] = 0

            post_com_state = state

            # Check if robot still alive
            if utils.check_data_useful(state):
                high_level_obs, high_level_delta_obs = utils.HL_obs(
                    pre_com_state), utils.HL_delta_obs(pre_com_state,
                                                       post_com_state)
                predict_state = high_level_planning.model_predict(
                    high_level_obs, latent_action)

            # collect data

            velocity_tracking[0][j] = target[0]
            velocity_tracking[1][j] = target[1]
            velocity_tracking[2][j] = predict_state[4]
            velocity_tracking[3][j] = predict_state[5]
            velocity_tracking[4][j] = high_level_delta_obs[4]
            velocity_tracking[5][j] = high_level_delta_obs[5]
            for q in range(model_output_dim):
                prediction_evaluation[q][j] = high_level_delta_obs[q]
                prediction_evaluation[q +
                                      model_output_dim][j] = predict_state[q]

            total_latent_action += 1

            if utils.check_robot_dead(state):
                break

    np.save(save_dir + '/prediction_evaluation.npy',
            np.array(prediction_evaluation))
    np.save(save_dir + '/velocity_tracking.npy', velocity_tracking)
    np.save('./save_data/trial_2/gait_record.npy', gait_record)

    return
Exemplo n.º 7
0
def evaluate_model(cfg):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    env = daisy_API(sim=cfg.sim, render=False, logger=False)
    env.set_control_mode(cfg.control_mode)
    state = env.reset()
    com_utils = utils.CoM_frame_MPC()

    if cfg.sim:
        init_state = motion_library.exp_standing(env)

    model_obs_dim, model_output_dim = np.size(
        com_utils.HL_obs(state)), np.size(com_utils.HL_delta_obs(state, state))

    high_level_planning = HLPM.high_level_planning(
        device=device,
        model_obs_dim=model_obs_dim,
        z_dim=cfg.z_dim,
        model_output_dim=model_output_dim,
        model_hidden_num=cfg.model_hidden_num,
        model_layer_num=cfg.model_layer_num,
        batch_size=cfg.batch_size,
        model_lr=cfg.model_lr,
        high_level_policy_type=cfg.high_level_policy_type,
        update_sample_policy=cfg.update_sample_policy,
        update_sample_policy_lr=cfg.update_sample_policy_lr,
        low_level_policy_type=cfg.low_level_policy_type,
        num_timestep_per_footstep=cfg.num_timestep_per_footstep,
        model_update_steps=cfg.model_update_steps,
        control_frequency=cfg.control_frequency)
    high_level_planning.load_data('.')
    if cfg.high_level_policy_type != 'SAC':
        high_level_planning.load_mean_var('.' + '/buffer_data')

    low_level_TG = LLTG.low_level_TG(
        device=device,
        z_dim=cfg.z_dim,
        a_dim=cfg.a_dim,
        num_timestep_per_footstep=cfg.num_timestep_per_footstep,
        batch_size=cfg.batch_size,
        low_level_policy_type=cfg.low_level_policy_type,
        update_low_level_policy=cfg.update_low_level_policy,
        update_low_level_policy_lr=cfg.update_low_level_policy_lr,
        init_state=init_state,
    )

    if cfg.low_level_policy_type == 'NN':
        low_level_TG.load_model('.')

    square_circle_test = []
    total_num = 6
    for i in range(1, total_num + 1):
        theta = i * math.pi / float(total_num)
        square_circle_test.append(
            np.array([1 - math.cos(theta), 1.5 * math.sin(theta), 0, 1]))

    for i in range(1, total_num + 1):
        theta = i * math.pi / float(total_num)
        square_circle_test.append(
            np.array([3 - math.cos(theta), -1.5 * math.sin(theta), 0, 1]))

    square_cost = []

    for iter in range(cfg.num_iters):
        position_tracking = [[], [], []]
        # reset robot to stand
        state = motion_library.exp_standing(env)
        low_level_TG.reset(state)
        position_tracking[0].append(state['base_pos_x'][0])
        position_tracking[1].append(state['base_pos_y'][0])
        position_tracking[2].append(state['base_ori_euler'][2])

        total_latent_action = 0
        total_cost = 0
        target_index = 0
        while True:
            target = square_circle_test[target_index]

            pre_com_state = state
            latent_action = high_level_planning.plan_latent_action(
                state, target, com_utils, cfg.mpc_sample_num, cfg.mpc_horizon)
            low_level_TG.update_latent_action(state, latent_action)

            for step in range(1, cfg.num_timestep_per_footstep + 1):
                action = low_level_TG.get_action(state, step)
                state = env.step(action)
                # collect data
                position_tracking[0].append(state['base_pos_x'][0])
                position_tracking[1].append(state['base_pos_y'][0])
                position_tracking[2].append(state['base_ori_euler'][2])
                if np.linalg.norm(target[0:2] - np.array(
                    [state['base_pos_x'][0], state['base_pos_y'][0]])) < 0.15:
                    print("Reach Goal %s!!!!" % str(target_index))
                    target_index += 1
                    if target_index >= np.shape(square_circle_test)[0]:
                        break
                    target = square_circle_test[target_index]

            post_com_state = state

            total_latent_action += 1

            if target_index >= np.shape(square_circle_test)[0]:
                np.save('./square_circle_test_' + str(iter) + '.npy',
                        np.array(position_tracking))
                break

            if total_latent_action > 200:
                print('Did not reach goal')
                break

    return
Exemplo n.º 8
0
def evaluate_model(cfg):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    env = daisy_API(sim=cfg.sim, render=cfg.render, logger=False)
    env.set_control_mode(cfg.control_mode)
    state = env.reset()
    com_utils = utils.CoM_frame_MPC()
    if cfg.high_level_policy_type == 'SAC':
        com_utils = utils.CoM_frame_RL()

    if cfg.sim:
        init_state = motion_library.exp_standing(env)

    if cfg.high_level_policy_type == 'SAC':
        model_obs_dim, model_output_dim = np.size(
            com_utils.HL_obs(state, np.zeros(3))), np.size(
                com_utils.HL_obs(state, np.zeros(3)))
    else:
        model_obs_dim, model_output_dim = np.size(
            com_utils.HL_obs(state)), np.size(
                com_utils.HL_delta_obs(state, state))

    high_level_planning = HLPM.high_level_planning(
        device=device,
        model_obs_dim=model_obs_dim,
        z_dim=cfg.z_dim,
        model_output_dim=model_output_dim,
        model_hidden_num=cfg.model_hidden_num,
        model_layer_num=cfg.model_layer_num,
        batch_size=cfg.batch_size,
        model_lr=cfg.model_lr,
        high_level_policy_type=cfg.high_level_policy_type,
        update_sample_policy=cfg.update_sample_policy,
        update_sample_policy_lr=cfg.update_sample_policy_lr,
        low_level_policy_type=cfg.low_level_policy_type,
        num_timestep_per_footstep=cfg.num_timestep_per_footstep,
        model_update_steps=cfg.model_update_steps,
        control_frequency=cfg.control_frequency)
    high_level_planning.load_data('.')
    if cfg.high_level_policy_type != 'SAC':
        high_level_planning.load_mean_var('.' + '/buffer_data')

    low_level_TG = LLTG.low_level_TG(
        device=device,
        z_dim=cfg.z_dim,
        a_dim=cfg.a_dim,
        num_timestep_per_footstep=cfg.num_timestep_per_footstep,
        batch_size=cfg.batch_size,
        low_level_policy_type=cfg.low_level_policy_type,
        update_low_level_policy=cfg.update_low_level_policy,
        update_low_level_policy_lr=cfg.update_low_level_policy_lr,
        init_state=init_state,
    )

    if cfg.low_level_policy_type == 'NN':
        low_level_TG.load_model('.')

    target_position_test = [
        np.array([0.0, 2.0, 0.0, 1]),
        np.array([2.0, 2.0, 0.0, 1]),
        np.array([-2.0, 2.0, 0.0, 1]),
        np.array([2.0, 0.0, 0.0, 1]),
        np.array([-2.0, 0.0, 0.0, 1]),
        np.array([0.0, -2.0, 0.0, 1]),
        np.array([-2.0, -2.0, 0.0, 1]),
        np.array([2.0, -2.0, 0.0, 1]),
    ]

    for q in range(cfg.num_iters):
        position_tracking = [
            [],
            [],
            [],
            [],
            [],
            [],
            [],
            [],
            [],
            [],
            [],
            [],
            [],
            [],
            [],
            [],
        ]
        for target_index in range(8):

            # reset robot to stand
            state = motion_library.exp_standing(env)
            low_level_TG.reset(state)

            position_tracking[2 * target_index].append(state['base_pos_x'][0])
            position_tracking[2 * target_index + 1].append(
                state['base_pos_y'][0])
            total_latent_action = 0
            target = target_position_test[target_index]
            target[0] = target[0] + state['base_pos_x'][0]
            target[1] = target[1] + state['base_pos_y'][0]

            while True:
                pre_com_state = state
                latent_action = high_level_planning.plan_latent_action(
                    state, target, com_utils, cfg.mpc_sample_num,
                    cfg.mpc_horizon)
                low_level_TG.update_latent_action(state, latent_action)

                for step in range(1, cfg.num_timestep_per_footstep + 1):
                    action = low_level_TG.get_action(state, step)
                    state = env.step(action)
                    position_tracking[2 * target_index].append(
                        state['base_pos_x'][0])
                    position_tracking[2 * target_index + 1].append(
                        state['base_pos_y'][0])
                    if np.linalg.norm(target[0:2] - np.array(
                        [state['base_pos_x'][0], state['base_pos_y'][0]])
                                      ) < 0.2:
                        print("ReachGoal" + str(target_index) + "!!!!")
                        break

                # collect data
                post_com_state = state
                total_latent_action += 1

                if np.linalg.norm(target[0:2] - np.array(
                    [state['base_pos_x'][0], state['base_pos_y'][0]])) < 0.2:
                    break
                if total_latent_action > 30:
                    break

        for i in range(8):
            position_tracking[i] = np.array(position_tracking[i])
        np.save(
            './' + cfg.high_level_policy_type + '_multi_tgt_test_' + str(q) +
            '.npy', np.array(position_tracking))

    return
Exemplo n.º 9
0
def evaluate_model(args):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    env = daisy_API(sim=args.sim, render=False, logger = False)
    env.set_control_mode(args.control_mode)
    state = env.reset()
    utils.make_dir(args.save_dir)
    save_dir = utils.make_dir(os.path.join(args.save_dir + '/trial_%s' % str(args.seed))) if args.save else None
    total_timestep, total_latent_action = 0, 0 

    if args.sim:
        if args.low_level_policy_type =='NN':
            init_state = motion_library.exp_standing(env, shoulder = 0.3, elbow = 1.3)
        else:
            init_state = motion_library.exp_standing(env)
    model_obs_dim, model_output_dim = np.size(utils.HL_obs(state)), np.size(utils.HL_delta_obs(state, state))

    
    high_level_planning = HLPM.high_level_planning(
        device = device,
        model_obs_dim = model_obs_dim,
        z_dim = args.z_dim,
        model_output_dim = model_output_dim,
        model_hidden_num = args.model_hidden_num,
        batch_size = args.batch_size,
        model_lr = args.model_lr,
        high_level_policy_type = args.high_level_policy_type,
        update_sample_policy = args.update_sample_policy,
        update_sample_policy_lr = args.update_sample_policy_lr,
        low_level_policy_type = args.low_level_policy_type,
        num_timestep_per_footstep = args.num_timestep_per_footstep,
        model_update_steps = args.model_update_steps,
        control_frequency= args.control_frequency
    )
    high_level_planning.load_data(save_dir)
    high_level_planning.load_mean_var(save_dir+'/buffer_data')
    
    low_level_TG = LLTG.low_level_TG(
        device = device,
        z_dim = args.z_dim,
        a_dim = args.a_dim,
        num_timestep_per_footstep = args.num_timestep_per_footstep,
        batch_size = args.batch_size,
        low_level_policy_type = args.low_level_policy_type,
        update_low_level_policy = args.update_low_level_policy,
        update_low_level_policy_lr = args.update_low_level_policy_lr,
        init_state = init_state,
    )

    if args.low_level_policy_type =='NN':
        low_level_TG.load_model('./save_data/trial_2')

    cost_record = []
    position_tracking = [[],[],[],[]]

    for iter in range(args.num_iters):
        # reset robot to stand 
        if args.sim:
            if args.low_level_policy_type =='NN':
                init_state = motion_library.exp_standing(env, shoulder = 0.3, elbow = 1.3)
            else:
                init_state = motion_library.exp_standing(env)
        low_level_TG.reset(state)
        position_tracking[0].append(state['base_pos_x'][0])
        position_tracking[1].append(state['base_pos_y'][0])
        position_tracking[2].append(state['base_ori_euler'][2])
        position_tracking[3].append(np.linalg.norm(state['base_velocity'][0:2]))
        
        j = 0
        total_cost = 0
        # while True:
        for i in range(args.num_latent_action_per_iteration):
            # target =  np.array([0,target_velocity,0])
            target = target_velocity

            pre_com_state = state
            latent_action = high_level_planning.plan_latent_action(state, target)
            low_level_TG.update_latent_action(state,latent_action)
            for step in range(1, args.num_timestep_per_footstep+1):
                action = low_level_TG.get_action(state, step)
                state, total_timestep = env.step(action), total_timestep + 1
                # collect data
                position_tracking[0].append(state['base_pos_x'][0])
                position_tracking[1].append(state['base_pos_y'][0])
                position_tracking[2].append(state['base_ori_euler'][2])
                position_tracking[3].append(np.linalg.norm(state['base_velocity'][0:2]))


            post_com_state = state
            cost = utils.easy_cost(target,pre_com_state, post_com_state)
            total_cost += cost

            
        
            # Check if robot still alive
            if utils.check_data_useful(state):
                high_level_obs , high_level_delta_obs = utils.HL_obs(pre_com_state), utils.HL_delta_obs(pre_com_state, post_com_state)
                predict_state = high_level_planning.model_predict(high_level_obs, latent_action)
            
           
            total_latent_action += 1
            j+=1

            # if np.linalg.norm(target[0:2] - np.array([state['base_pos_x'][0], state['base_pos_y'][0]])) + np.linalg.norm(target[2:4] - high_level_delta_obs[0:2])  < 0.2 :
            #     print("Reach Goal %s!!!!")
            #     break

            # if j>20:
            #     print('Did not reach goal')
            #     break
        
        cost_record.append(total_cost)
        print(cost_record)
    np.save(save_dir + '/'+args.high_level_policy_type+'_' +args.low_level_policy_type +'_different_cost_test'+ str(cost_index)+'.npy', np.array(cost_record) )
    np.save(save_dir + '/'+args.high_level_policy_type+'_' +args.low_level_policy_type +'_com.npy', np.array(position_tracking) )
    return