示例#1
0
def experiment(env_id, traj_id, verbose=False):
    obs_list = get_obs('acrobot_obs', env_id)
    data = load_data('acrobot_obs', env_id, traj_id)
    ref_path = data['path']
    start_goal = data['start_goal']
    env_vox = torch.from_numpy(np.load('mpnet/sst_envs/acrobot_obs_env_vox.npy')[env_id]).unsqueeze(0).float()
    
    tic = time.perf_counter()
    final_start = ref_path[0]
    final_goal = ref_path[-1]
    ## initiate planner
    params = get_params(final_goal)
    mpc_mpnet = MPCMPNetPlanner(
        params,
        final_start, 
        final_goal,
        env_vox,
        system="acrobot_obs",
        setup="default_norm",
        #setup="default_norm_aug",
        #setup="norm_nodiff_noaug_20step2e-2",
        ep=5000,
        obs_list=obs_list[env_id], 
        verbose=verbose)
    mpc_mpnet.mpnet.train()
    
    ## start experiment
    it = 0
    while it < params['max_plan_it'] and not mpc_mpnet.reached:
        mpc_mpnet.reset()
        for i in range(20):
            if verbose:
                print('iteration:', i)
            mpc_mpnet.step()
            if mpc_mpnet.reached:
                break
            it += 1
    
    toc = time.perf_counter()
#     print(mpc_mpnet.costs)
    costs = np.sum(mpc_mpnet.costs) if len(mpc_mpnet.costs) > 0 else np.inf
    result = {
        'env_id': env_id,
        'traj_id': traj_id,
        'planning_time': toc-tic,
        'successful': mpc_mpnet.reached,
        'costs': costs
    }
    
    print("env {}, traj {}, {}, time: {} seconds, {}(ref:{}) costs".format(
            env_id, 
            traj_id,
            result['successful'],
            result['planning_time'],
            result['costs'],
             data['cost'].sum()))
    return result
示例#2
0
def experiment(env_id,
               traj_id,
               verbose=False,
               system='acrobot_obs',
               params=None):
    print("env {}, traj {}".format(env_id, traj_id))
    data = load_data(system, env_id, traj_id)
    ref_path = data['path']
    start_goal = data['start_goal']
    costs = data['cost'].sum()
    #     print(mpc_mpnet.costs)
    result = {
        'env_id': env_id,
        'traj_id': traj_id,
        'planning_time': 0,
        'successful': 1,
        'costs': costs
    }

    print("\t{}, time: {} seconds, {}(ref:{}) costs".format(
        result['successful'], result['planning_time'], result['costs'],
        data['cost'].sum()))
    return result
示例#3
0
def experiment(env_id,
               traj_id,
               verbose=False,
               system='cartpole_obs',
               params=None):
    print("env {}, traj {}".format(env_id, traj_id))
    # print(params)
    if system in ['cartpole_obs', 'acrobot_obs', 'car_obs']:
        obs_list = get_obs(system, env_id)[env_id].reshape(-1, 2)
    elif system in ['quadrotor_obs']:
        obs_list = get_obs_3d(system, env_id)[env_id].reshape(-1, 3)
    data = load_data(system, env_id, traj_id)
    ref_path = data['path']
    start_goal = data['start_goal']
    # print(start_goal)
    env_vox = np.load('mpnet/sst_envs/data/{}_env_vox.npy'.format(system))
    obc = env_vox[env_id]
    # print(obc.reshape(-1), obc.reshape(-1).shape)
    # print(obs_list)
    number_of_iterations = params['number_of_iterations']  # 3000000#
    min_time_steps = params[
        'min_time_steps'] if 'min_time_steps' in params else 0
    max_time_steps = params[
        'max_time_steps'] if 'min_time_steps' in params else 800
    integration_step = params['dt']
    planner = _deep_smp_module.DSSTMPCWrapper(
        system_type=system,
        start_state=np.array(data['start_goal'][0]),
        # goal_state=np.array(ref_path[-1]),
        goal_state=np.array(data['start_goal'][1]),
        goal_radius=params['goal_radius'],
        random_seed=0,
        sst_delta_near=params['sst_delta_near'],
        sst_delta_drain=params['sst_delta_drain'],
        obs_list=obs_list,
        width=params['width'],
        verbose=params['verbose'],
        mpnet_weight_path=params['mpnet_weight_path'],
        cost_predictor_weight_path=params['cost_predictor_weight_path'],
        cost_to_go_predictor_weight_path=params[
            'cost_to_go_predictor_weight_path'],
        num_sample=params['cost_samples'],
        shm_max_step=params['shm_max_steps'],
        np=params['n_problem'],
        ns=params['n_sample'],
        nt=params['n_t'],
        ne=params['n_elite'],
        max_it=params['max_it'],
        converge_r=params['converge_r'],
        mu_u=params['mu_u'],
        std_u=params['sigma_u'],
        mu_t=params['mu_t'],
        std_t=params['sigma_t'],
        t_max=params['t_max'],
        step_size=params['step_size'],
        integration_step=params['dt'],
        device_id=params['device_id'],
        refine_lr=params['refine_lr'],
        weights_array=params['weights_array'],
        obs_voxel_array=obc.reshape(-1))
    solution = planner.get_solution()

    data_cost = np.sum(data['cost'])
    # th = 1.2 * data_cost
    # start experiment
    tic = time.perf_counter()
    for iteration in tqdm(range(number_of_iterations)):
        planner.deep_smp_step(
            params['refine'],
            refine_threshold=params['refine_threshold'],
            using_one_step_cost=params['using_one_step_cost'],
            cost_reselection=params['cost_reselection'],
            goal_bias=params['goal_bias'],
            NP=params['n_problem'])
        solution = planner.get_solution()
        # and np.sum(solution[2]) < th:
        if solution is not None or time.perf_counter(
        ) - tic > params['max_planning_time']:
            break
    toc = time.perf_counter()
    # if solution is not None:
    #     print(solution[0], solution[2])
    #     print(mpc_mpnet.costs)
    costs = solution[2].sum() if solution is not None else np.inf
    result = {
        'env_id': env_id,
        'traj_id': traj_id,
        'planning_time': toc - tic,
        'successful': solution is not None,
        'costs': costs,
        'traj': solution[0]
    }

    print("\t{}, time: {} seconds, {}(ref:{}) costs".format(
        result['successful'], result['planning_time'], result['costs'],
        np.sum(data['cost'])))
    return result
示例#4
0
def experiment(env_id,
               traj_id,
               verbose=False,
               system='cartpole_obs',
               params_module=None):
    print("env {}, traj {}".format(env_id, traj_id))
    obs_list = get_obs(system, env_id)[env_id].reshape(-1, 2)
    data = load_data(system, env_id, traj_id)
    ref_path = data['path']
    start_goal = data['start_goal']
    # print(start_goal)
    env_vox = np.load('mpnet/sst_envs/{}_env_vox.npy'.format(system))
    obc = env_vox[env_id, 0]
    # print(obs_list)
    params = params_module.get_params()
    number_of_iterations = params['number_of_iterations']  #3000000#
    min_time_steps = params[
        'min_time_steps'] if 'min_time_steps' in params else 80
    max_time_steps = params[
        'max_time_steps'] if 'min_time_steps' in params else 400
    integration_step = params['dt']

    planner = _deep_smp_module.DSSTMPCWrapper(
        system,
        start_state=np.array(ref_path[0]),
        goal_state=np.array(ref_path[-1]),
        # goal_state=np.array(data['start_goal'][-1]),
        goal_radius=params['goal_radius'],
        random_seed=0,
        sst_delta_near=params['sst_delta_near'],
        sst_delta_drain=params['sst_delta_drain'],
        goal_bias=params['goal_bias'],
        obs_list=obs_list,
        width=params['width'],
        verbose=params['verbose'],
        mpnet_weight_path=params['mpnet_weight_path'],
        cost_predictor_weight_path=params['cost_predictor_weight_path'],
        cost_to_go_predictor_weight_path=params[
            'cost_to_go_predictor_weight_path'],
        num_sample=params['cost_samples'],
        ns=params['n_sample'],
        nt=params['n_t'],
        ne=params['n_elite'],
        max_it=params['max_it'],
        converge_r=params['converge_r'],
        mu_u=params['mu_u'],
        std_u=params['sigma_u'],
        mu_t=params['mu_t'],
        std_t=params['sigma_t'],
        t_max=params['t_max'],
        step_size=params['step_size'],
        integration_step=params['dt'],
        device_id=params['device_id'],
        refine_lr=params['refine_lr'],
    )
    solution = planner.get_solution()

    data_cost = np.sum(data['cost'])
    th = 1.2 * data_cost
    ## start experiment
    tic = time.perf_counter()
    for iteration in tqdm(range(number_of_iterations)):
        # planner.step(min_time_steps, max_time_steps, params['dt'])
        if params['hybrid']:
            if np.random.rand() < params['hybrid_p']:
                planner.step(min_time_steps, max_time_steps, integration_step)
            else:
                planner.neural_step(
                    obc.reshape(-1),
                    params['refine'],
                    refine_threshold=params['refine_threshold'],
                    using_one_step_cost=params['using_one_step_cost'],
                    cost_reselection=params['cost_reselection'])
        else:
            # planner.step(min_time_steps, max_time_steps, integration_step)
            planner.neural_step(
                obc.reshape(-1),
                params['refine'],
                refine_threshold=params['refine_threshold'],
                using_one_step_cost=params['using_one_step_cost'],
                cost_reselection=params['cost_reselection'])
        # planner.mpc_step(integration_step)
        solution = planner.get_solution()
        if solution is not None:  #and np.sum(solution[2]) < th:
            break
    toc = time.perf_counter()
    if solution is not None:
        print(solution[0], solution[2])


#     print(mpc_mpnet.costs)
    costs = solution[2].sum() if solution is not None else np.inf
    result = {
        'env_id': env_id,
        'traj_id': traj_id,
        'planning_time': toc - tic,
        'successful': solution is not None,
        'costs': costs
    }

    print("\t{}, time: {} seconds, {}(ref:{}) costs".format(
        result['successful'], result['planning_time'], result['costs'],
        np.sum(data['cost'])))
    return result
示例#5
0
import importlib
from matplotlib import pyplot as plt
from sparse_rrt import _deep_smp_module, _sst_module

#[(0, 932), (1, 935), (2, 923), (8, 141), (5,931), (7, 927)]
# (5,931), (6, 286)
from mpnet.sst_envs.utils import load_data, get_obs_3d
system = "quadrotor_obs"

import sys
#print 'Number of arguments:', len(sys.argv), 'arguments.'
#print 'Argument List:', str(sys.argv)
env_id = int(sys.argv[1])
traj_id = int(sys.argv[2])

data = load_data(system, env_id, traj_id)
print(data)
cost = data['cost'].sum()
print(cost)

obs_list = get_obs_3d('quadrotor_obs', "obs")[env_id]
obc_list = np.load(
    '/media/arclabdl1/HD1/YLmiao/YLmiao_from_unicorn/YLmiao/mpc-mpnet-cuda-yinglong/mpnet/sst_envs/{}_env_vox.npy'
    .format(system))
obc_list = obc_list[env_id].reshape(-1)

path_filename = '/media/arclabdl1/HD1/Linjun/mpc-mpnet-py/results/traj/quadrotor_obs/shm/%d/env_%d_id_%d.npy' % (
    env_id, env_id, traj_id)
mpc_path_path = np.load(path_filename, allow_pickle=True)

mpc_tree_path, mpc_tree_control, mpc_tree_time = np.load(
示例#6
0
def experiment(env_id,
               traj_id,
               verbose=False,
               system='cartpole_obs',
               params=None):
    print("env {}, traj {}".format(env_id, traj_id))
    if system in ['cartpole_obs', 'acrobot_obs', 'car_obs']:
        obs_list = get_obs(system, env_id)[env_id]
    elif system in ['quadrotor_obs']:
        obs_list = get_obs_3d(system, env_id)[env_id].reshape(-1, 3)
    data = load_data(system, env_id, traj_id)
    ref_path = data['path']
    ref_cost = data['cost'].sum()
    start_goal = data['start_goal']
    min_time_steps = params['min_time_steps']
    max_time_steps = params['max_time_steps']
    integration_step = params['integration_step']

    if system == 'quadrotor_obs':
        env_constr = standard_cpp_systems.RectangleObs3D
        env = env_constr(obs_list, params['width'], 'quadrotor')
    else:
        print("unkown system")
        exit(-1)
    planner = _sst_module.SSTWrapper(state_bounds=env.get_state_bounds(),
                                     control_bounds=env.get_control_bounds(),
                                     distance=env.distance_computer(),
                                     start_state=start_goal[0],
                                     goal_state=start_goal[-1],
                                     goal_radius=params['goal_radius'],
                                     random_seed=params['random_seed'],
                                     sst_delta_near=params['sst_delta_near'],
                                     sst_delta_drain=params['sst_delta_drain'])
    solution = planner.get_solution()

    data_cost = np.sum(data['cost'])
    # th = 1.2 * data_cost
    ## start experiment
    tic = time.perf_counter()
    for iteration in tqdm(range(params['number_of_iterations'])):
        planner.step(env, min_time_steps, max_time_steps, integration_step)
        if iteration % 100 == 0:
            solution = planner.get_solution()
            if (solution is not None and solution[2].sum() <= ref_cost * 1.4
                ) or time.perf_counter() - tic > params[
                    'max_planning_time']:  #and np.sum(solution[2]) < th:
                break
    toc = time.perf_counter()

    costs = solution[2].sum() if solution is not None else np.inf
    result = {
        'env_id': env_id,
        'traj_id': traj_id,
        'planning_time': toc - tic,
        'successful': solution is not None,
        'costs': costs
    }

    print("\t{}, time: {} seconds, {}(ref:{}) costs".format(
        result['successful'], result['planning_time'], result['costs'],
        np.sum(data['cost'])))
    return result
示例#7
0
def experiment(env_id, traj_id, verbose=False, model='acrobot_obs', params_module=None):
    print("env {}, traj {}".format(env_id, traj_id))
    obs_list = get_obs(model, env_id)[env_id].reshape(-1, 2)
    data = load_data(model, env_id, traj_id)
    ref_path = data['path']
    start_goal = data['start_goal']
    env_vox = np.load('mpnet/sst_envs/data/acrobot_obs_env_vox.npy')
    obc = env_vox[env_id, 0]
    width = 6
    number_of_iterations = 60000
    number_of_refine = 120000
    min_time_steps, max_time_steps = 1, 200
    integration_step = 1e-2
    params = params_module.get_params()
    planner = _deep_smp_module.DSSTMPCWrapper(
            start_state=np.array(ref_path[0]),
            goal_state=np.array(ref_path[-1]),
            goal_radius=params['goal_radius'],
            random_seed=0,
            sst_delta_near=params['sst_delta_near'],
            sst_delta_drain=params['sst_delta_drain'],
            obs_list=obs_list,
            width=params['width'],
            verbose=params['verbose'],
            mpnet_weight_path=params['mpnet_weight_path'], 
            cost_predictor_weight_path=params['cost_predictor_weight_path'],
            cost_to_go_predictor_weight_path=params['cost_to_go_predictor_weight_path'],
            num_sample=params['cost_samples'],
            ns=params['n_sample'], nt=params['n_t'], ne=params['n_elite'], max_it=params['max_it'],
            converge_r=params['converge_r'], mu_u=params['mu_u'], std_u=params['sigma_u'], mu_t=params['mu_t'], 
            std_t=params['sigma_t'], t_max=params['t_max'], step_size=params['step_size'], integration_step=params['dt'], 
            device_id=params['device_id'], refine_lr=params['refine_lr']
        )
    solution = planner.get_solution()

    ## start experiment
    tic = time.perf_counter()
    for iteration in tqdm(range(number_of_iterations)):
        planner.mpc_step(integration_step)
        # planner.step(min_time_steps, max_time_steps, integration_step)
        solution = planner.get_solution()
        if solution is not None:            
            break
    for iteration in tqdm(range(number_of_refine)):
        solution = planner.get_solution()
        # if iteration % 1000 == 0:
        #     print(solution[2].sum(), data['cost'].sum() )
        if solution[2].sum() < data['cost'].sum() * 1.2:
            break
        else:
            planner.step(min_time_steps, max_time_steps, integration_step)
    toc = time.perf_counter()
#     print(mpc_mpnet.costs)
    costs = solution[2].sum() if solution is not None else np.inf
    result = {
        'env_id': env_id,
        'traj_id': traj_id,
        'planning_time': toc-tic,
        'successful': solution is not None,
        'costs': costs
    }
    
    print("\t{}, time: {} seconds, {}(ref:{}) costs".format(
            result['successful'],
            result['planning_time'],
            result['costs'],
            data['cost'].sum()))
    return result