Пример #1
0
#ctypes.cdll.LoadLibrary('')
lib1 = CDLL("/home/yinglong/Documents/kinodynamic/sparse_rrt/deps/trajopt/build/lib/libsco.so")
lib2 = CDLL("/home/yinglong/Documents/kinodynamic/sparse_rrt/deps/trajopt/build/lib/libutils.so")
#from env.cartpole import CartPole
import sparse_rrt
from sparse_rrt.systems import standard_cpp_systems
from sparse_rrt import _sst_module
import numpy as np
import time
import matplotlib.pyplot as plt
from sparse_rrt.systems.pendulum import Pendulum
import pickle
#obs_list = np.array(obs_list)
#system = standard_cpp_systems.PSOPTCartPole()
_system = sparse_rrt._sst_module.PSOPTPendulum()
bvp_solver = _sst_module.PSOPTBVPWrapper(_system, 2, 1, 0)
#start = np.array([0., 0.])
#end = np.array([np.pi/2, 0.])
low = []
high = []
state_bounds = _system.get_state_bounds()
for i in range(len(state_bounds)):
    low.append(state_bounds[i][0])
    high.append(state_bounds[i][1])

for i in range(10):
    f = open('data/pendulum/0/path_%d.pkl' % (i), 'rb')
    state = pickle.load(f)

    f = open('data/pendulum/0/control_%d.pkl' % (i), 'rb')
    control = pickle.load(f)
Пример #2
0
def main(args):
    # set seed
    print(args.model_path)
    torch_seed = np.random.randint(low=0, high=1000)
    np_seed = np.random.randint(low=0, high=1000)
    py_seed = np.random.randint(low=0, high=1000)
    torch.manual_seed(torch_seed)
    np.random.seed(np_seed)
    random.seed(py_seed)
    # Build the models
    if torch.cuda.is_available():
        torch.cuda.set_device(args.device)

    # setup evaluation function and load function
    if args.env_type == 'pendulum':
        IsInCollision = pendulum.IsInCollision
        normalize = pendulum.normalize
        unnormalize = pendulum.unnormalize
        obs_file = None
        obc_file = None
        dynamics = pendulum.dynamics
        jax_dynamics = pendulum.jax_dynamics
        enforce_bounds = pendulum.enforce_bounds
        cae = cae_identity
        mlp = MLP
        obs_f = False
        #system = standard_cpp_systems.PSOPTPendulum()
        #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
    elif args.env_type == 'cartpole_obs':
        IsInCollision = cartpole.IsInCollision
        normalize = cartpole.normalize
        unnormalize = cartpole.unnormalize
        obs_file = None
        obc_file = None
        dynamics = cartpole.dynamics
        jax_dynamics = cartpole.jax_dynamics
        enforce_bounds = cartpole.enforce_bounds
        cae = CAE_acrobot_voxel_2d
        mlp = mlp_acrobot.MLP
        obs_f = True
        #system = standard_cpp_systems.RectangleObs(obs_list, args.obs_width, 'cartpole')
        #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
    elif args.env_type == 'acrobot_obs':
        IsInCollision = acrobot_obs.IsInCollision
        #IsInCollision = lambda x, obs: False
        normalize = acrobot_obs.normalize
        unnormalize = acrobot_obs.unnormalize
        obs_file = None
        obc_file = None
        system = _sst_module.PSOPTAcrobot()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)
        xdot = acrobot_obs.dynamics
        jax_dynamics = acrobot_obs.jax_dynamics
        enforce_bounds = acrobot_obs.enforce_bounds
        cae = CAE_acrobot_voxel_2d
        mlp = mlp_acrobot.MLP
        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 21
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(
            x0, x1, 50, num_steps, step_sz * 1, step_sz *
            (num_steps - 1), x_init, u_init, t_init)
        goal_S0 = np.diag([1., 1., 0, 0])
        #goal_S0 = np.identity(4)
        goal_rho0 = 1.0

    elif args.env_type == 'acrobot_obs_2':
        IsInCollision = acrobot_obs.IsInCollision
        normalize = acrobot_obs.normalize
        unnormalize = acrobot_obs.unnormalize
        obs_file = None
        obc_file = None
        system = _sst_module.PSOPTAcrobot()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)
        xdot = acrobot_obs.dynamics
        jax_dynamics = acrobot_obs.jax_dynamics
        enforce_bounds = acrobot_obs.enforce_bounds
        cae = CAE_acrobot_voxel_2d_2
        mlp = mlp_acrobot.MLP2
        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 21
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(
            x0, x1, 400, num_steps, step_sz * 1, step_sz *
            (num_steps - 1), x_init, u_init, t_init)
        goal_S0 = np.diag([1., 1., 0, 0])
        #goal_S0 = np.identity(4)
        goal_rho0 = 1.0

    elif args.env_type == 'acrobot_obs_3':
        IsInCollision = acrobot_obs.IsInCollision
        normalize = acrobot_obs.normalize
        unnormalize = acrobot_obs.unnormalize
        obs_file = None
        obc_file = None
        system = _sst_module.PSOPTAcrobot()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)
        xdot = acrobot_obs.dynamics
        jax_dynamics = acrobot_obs.jax_dynamics
        enforce_bounds = acrobot_obs.enforce_bounds
        mlp = mlp_acrobot.MLP3
        cae = CAE_acrobot_voxel_2d_2
        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 21
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(
            x0, x1, 400, num_steps, step_sz * 1, step_sz *
            (num_steps - 1), x_init, u_init, t_init)
        goal_S0 = np.diag([1., 1., 0, 0])
        #goal_S0 = np.identity(4)
        goal_rho0 = 1.0

    elif args.env_type == 'acrobot_obs_5':
        IsInCollision = acrobot_obs.IsInCollision
        normalize = acrobot_obs.normalize
        unnormalize = acrobot_obs.unnormalize
        obs_file = None
        obc_file = None
        system = _sst_module.PSOPTAcrobot()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)
        xdot = acrobot_obs.dynamics
        jax_dynamics = acrobot_obs.jax_dynamics
        enforce_bounds = acrobot_obs.enforce_bounds
        cae = CAE_acrobot_voxel_2d_3
        mlp = mlp_acrobot.MLP
        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 21
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(
            x0, x1, 400, num_steps, step_sz * 1, step_sz *
            (num_steps - 1), x_init, u_init, t_init)
        goal_S0 = np.diag([1., 1., 0, 0])
        #goal_S0 = np.identity(4)
        goal_rho0 = 1.0
    elif args.env_type == 'acrobot_obs_6':
        IsInCollision = acrobot_obs.IsInCollision
        normalize = acrobot_obs.normalize
        unnormalize = acrobot_obs.unnormalize
        obs_file = None
        obc_file = None
        xdot = acrobot_obs.dynamics
        system = _sst_module.PSOPTAcrobot()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)
        jax_dynamics = acrobot_obs.jax_dynamics
        enforce_bounds = acrobot_obs.enforce_bounds
        cae = CAE_acrobot_voxel_2d_3
        mlp = mlp_acrobot.MLP4
        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 21
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(
            x0, x1, 400, num_steps, step_sz * 1, step_sz *
            (num_steps - 1), x_init, u_init, t_init)
        goal_S0 = np.diag([1., 1., 0, 0])
        #goal_S0 = np.identity(4)
        goal_rho0 = 1.0

    elif args.env_type == 'acrobot_obs_6':
        IsInCollision = acrobot_obs.IsInCollision
        normalize = acrobot_obs.normalize
        unnormalize = acrobot_obs.unnormalize
        obs_file = None
        obc_file = None
        xdot = acrobot_obs.dynamics
        system = _sst_module.PSOPTAcrobot()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)
        jax_dynamics = acrobot_obs.jax_dynamics
        enforce_bounds = acrobot_obs.enforce_bounds
        mlp = mlp_acrobot.MLP5
        cae = CAE_acrobot_voxel_2d_3
        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 21
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(
            x0, x1, 400, num_steps, step_sz * 1, step_sz *
            (num_steps - 1), x_init, u_init, t_init)
        goal_S0 = np.diag([1., 1., 0, 0])
        #goal_S0 = np.identity(4)
        goal_rho0 = 1.0

    elif args.env_type == 'acrobot_obs_8':
        IsInCollision = acrobot_obs.IsInCollision
        #IsInCollision = lambda x, obs: False
        normalize = acrobot_obs.normalize
        unnormalize = acrobot_obs.unnormalize
        obs_file = None
        obc_file = None
        system = _sst_module.PSOPTAcrobot()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)
        xdot = acrobot_obs.dynamics
        jax_dynamics = acrobot_obs.jax_dynamics
        enforce_bounds = acrobot_obs.enforce_bounds
        cae = CAE_acrobot_voxel_2d_3
        mlp = mlp_acrobot.MLP6
        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        #num_steps = 21
        num_steps = 21  #args.num_steps*2
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(
            x0, x1, 400, num_steps, step_sz * 1, step_sz *
            (num_steps - 1), x_init, u_init, t_init)
        #traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init:
        #def cem_trajopt(x0, x1, step_sz, num_steps, x_init, u_init, t_init):
        #    u, t = acrobot_obs.trajopt(x0, x1, 500, num_steps, step_sz*1, step_sz*(num_steps-1), x_init, u_init, t_init)
        #    xs, us, dts, valid = propagate(x0, u, t, dynamics=dynamics, enforce_bounds=enforce_bounds, IsInCollision=lambda x: False, system=system, step_sz=step_sz)
        #    return xs, us, dts
        #traj_opt = cem_trajopt
        goal_S0 = np.diag([1., 1., 0, 0])
        goal_rho0 = 1.0

    mpNet0 = KMPNet(args.total_input_size, args.AE_input_size,
                    args.mlp_input_size, args.output_size, cae, mlp)
    mpNet1 = KMPNet(args.total_input_size, args.AE_input_size,
                    args.mlp_input_size, args.output_size, cae, mlp)

    # load previously trained model if start epoch > 0
    #model_path='kmpnet_epoch_%d_direction_0_step_%d.pkl' %(args.start_epoch, args.num_steps)
    model_path = 'kmpnet_epoch_%d_direction_0.pkl' % (args.start_epoch)
    if args.start_epoch > 0:
        load_net_state(mpNet0, os.path.join(args.model_path, model_path))
        torch_seed, np_seed, py_seed = load_seed(
            os.path.join(args.model_path, model_path))
        # set seed after loading
        torch.manual_seed(torch_seed)
        np.random.seed(np_seed)
        random.seed(py_seed)
    if torch.cuda.is_available():
        mpNet0.cuda()
        mpNet0.mlp.cuda()
        mpNet0.encoder.cuda()
        if args.opt == 'Adagrad':
            mpNet0.set_opt(torch.optim.Adagrad, lr=args.learning_rate)
        elif args.opt == 'Adam':
            mpNet0.set_opt(torch.optim.Adam, lr=args.learning_rate)
        elif args.opt == 'SGD':
            mpNet0.set_opt(torch.optim.SGD,
                           lr=args.learning_rate,
                           momentum=0.9)
    if args.start_epoch > 0:
        load_opt_state(mpNet0, os.path.join(args.model_path, model_path))

    # load previously trained model if start epoch > 0
    #model_path='kmpnet_epoch_%d_direction_1_step_%d.pkl' %(args.start_epoch, args.num_steps)
    model_path = 'kmpnet_epoch_%d_direction_1.pkl' % (args.start_epoch)
    if args.start_epoch > 0:
        load_net_state(mpNet1, os.path.join(args.model_path, model_path))
        torch_seed, np_seed, py_seed = load_seed(
            os.path.join(args.model_path, model_path))
        # set seed after loading
        torch.manual_seed(torch_seed)
        np.random.seed(np_seed)
        random.seed(py_seed)
    if torch.cuda.is_available():
        mpNet1.cuda()
        mpNet1.mlp.cuda()
        mpNet1.encoder.cuda()
        if args.opt == 'Adagrad':
            mpNet1.set_opt(torch.optim.Adagrad, lr=args.learning_rate)
        elif args.opt == 'Adam':
            mpNet1.set_opt(torch.optim.Adam, lr=args.learning_rate)
        elif args.opt == 'SGD':
            mpNet1.set_opt(torch.optim.SGD,
                           lr=args.learning_rate,
                           momentum=0.9)
    if args.start_epoch > 0:
        load_opt_state(mpNet1, os.path.join(args.model_path, model_path))

    # define informer
    circular = system.is_circular_topology()

    def informer(env, x0, xG, direction):
        x0_x = torch.from_numpy(x0.x).type(torch.FloatTensor)
        xG_x = torch.from_numpy(xG.x).type(torch.FloatTensor)
        x0_x = normalize_func(x0_x)
        xG_x = normalize_func(xG_x)
        if torch.cuda.is_available():
            x0_x = x0_x.cuda()
            xG_x = xG_x.cuda()
        if direction == 0:
            x = torch.cat([x0_x, xG_x], dim=0)
            mpNet = mpNet0
            if torch.cuda.is_available():
                x = x.cuda()
            next_state = mpNet(x.unsqueeze(0), env.unsqueeze(0)).cpu().data
            next_state = unnormalize_func(next_state).numpy()[0]
            delta_x = next_state - x0.x
            # can be either clockwise or counterclockwise, take shorter one
            for i in range(len(delta_x)):
                if circular[i]:
                    delta_x[i] = delta_x[i] - np.floor(
                        delta_x[i] / (2 * np.pi)) * (2 * np.pi)
                    if delta_x[i] > np.pi:
                        delta_x[i] = delta_x[i] - 2 * np.pi
                    # randomly pick either direction
                    rand_d = np.random.randint(2)
                    if rand_d < 1 and np.abs(delta_x[i]) >= np.pi * 0.5:
                        if delta_x[i] > 0.:
                            delta_x[i] = delta_x[i] - 2 * np.pi
                        if delta_x[i] <= 0.:
                            delta_x[i] = delta_x[i] + 2 * np.pi

            res = Node(x0.x + delta_x)
            cov = np.diag([0.02, 0.02, 0.02, 0.02])
            #mean = next_state
            #next_state = np.random.multivariate_normal(mean=next_state,cov=cov)
            mean = np.zeros(next_state.shape)
            rand_x_init = np.random.multivariate_normal(mean=mean,
                                                        cov=cov,
                                                        size=num_steps)
            rand_x_init[0] = rand_x_init[0] * 0.
            rand_x_init[-1] = rand_x_init[-1] * 0.

            x_init = np.linspace(x0.x, x0.x + delta_x, num_steps) + rand_x_init
            ## TODO: : change this to general case
            u_init_i = np.random.uniform(low=[-4.],
                                         high=[4],
                                         size=(num_steps, 1))
            u_init = u_init_i
            #u_init_i = control[max_d_i]
            cost_i = (num_steps - 1) * step_sz  #TOEDIT
            #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i))
            #u_init = u_init + np.random.normal(scale=1., size=u_init.shape)
            t_init = np.linspace(0, cost_i, num_steps)
            """
            print('init:')
            print('x_init:')
            print(x_init)
            print('u_init:')
            print(u_init)
            print('t_init:')
            print(t_init)
            print('xw:')
            print(next_state)
            """
        else:
            x = torch.cat([x0_x, xG_x], dim=0)
            mpNet = mpNet1
            next_state = mpNet(x.unsqueeze(0), env.unsqueeze(0)).cpu().data
            next_state = unnormalize_func(next_state).numpy()[0]
            delta_x = next_state - x0.x
            # can be either clockwise or counterclockwise, take shorter one
            for i in range(len(delta_x)):
                if circular[i]:
                    delta_x[i] = delta_x[i] - np.floor(
                        delta_x[i] / (2 * np.pi)) * (2 * np.pi)
                    if delta_x[i] > np.pi:
                        delta_x[i] = delta_x[i] - 2 * np.pi
                    # randomly pick either direction
                    rand_d = np.random.randint(2)
                    if rand_d < 1 and np.abs(delta_x[i]) >= np.pi * 0.5:
                        if delta_x[i] > 0.:
                            delta_x[i] = delta_x[i] - 2 * np.pi
                        elif delta_x[i] <= 0.:
                            delta_x[i] = delta_x[i] + 2 * np.pi
            #next_state = state[max_d_i] + delta_x
            next_state = x0.x + delta_x
            res = Node(next_state)
            # initial: from max_d_i to max_d_i+1
            x_init = np.linspace(next_state, x0.x, num_steps) + rand_x_init
            # action: copy over to number of steps
            u_init_i = np.random.uniform(low=[-4.],
                                         high=[4],
                                         size=(num_steps, 1))
            u_init = u_init_i
            cost_i = (num_steps - 1) * step_sz
            #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i))
            #u_init = u_init + np.random.normal(scale=1., size=u_init.shape)
            t_init = np.linspace(0, cost_i, num_steps)
        return res, x_init, u_init, t_init

    def init_informer(env, x0, xG, direction):
        if direction == 0:
            next_state = xG.x
            delta_x = next_state - x0.x

            # can be either clockwise or counterclockwise, take shorter one
            for i in range(len(delta_x)):
                if circular[i]:
                    delta_x[i] = delta_x[i] - np.floor(
                        delta_x[i] / (2 * np.pi)) * (2 * np.pi)
                    if delta_x[i] > np.pi:
                        delta_x[i] = delta_x[i] - 2 * np.pi
                    # randomly pick either direction
                    rand_d = np.random.randint(2)
                    #print('inside init_informer')
                    #print('delta_x[%d]: %f' % (i, delta_x[i]))
                    if rand_d < 1 and np.abs(delta_x[i]) >= np.pi * 0.9:
                        if delta_x[i] > 0.:
                            delta_x[i] = delta_x[i] - 2 * np.pi
                        if delta_x[i] <= 0.:
                            delta_x[i] = delta_x[i] + 2 * np.pi
            res = Node(next_state)
            cov = np.diag([0.02, 0.02, 0.02, 0.02])
            #mean = next_state
            #next_state = np.random.multivariate_normal(mean=next_state,cov=cov)
            mean = np.zeros(next_state.shape)
            rand_x_init = np.random.multivariate_normal(mean=mean,
                                                        cov=cov,
                                                        size=num_steps)
            rand_x_init[0] = rand_x_init[0] * 0.
            rand_x_init[-1] = rand_x_init[-1] * 0.

            x_init = np.linspace(x0.x, x0.x + delta_x, num_steps) + rand_x_init
            ## TODO: : change this to general case
            u_init_i = np.random.uniform(low=[-4.],
                                         high=[4],
                                         size=(num_steps, 1))
            u_init = u_init_i
            #u_init_i = control[max_d_i]
            #cost_i = 10*step_sz
            cost_i = (num_steps - 1) * step_sz

            #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i))
            #u_init = u_init + np.random.normal(scale=1., size=u_init.shape)
            t_init = np.linspace(0, cost_i, num_steps)

        else:
            next_state = xG.x
            delta_x = x0.x - next_state
            # can be either clockwise or counterclockwise, take shorter one
            for i in range(len(delta_x)):
                if circular[i]:
                    delta_x[i] = delta_x[i] - np.floor(
                        delta_x[i] / (2 * np.pi)) * (2 * np.pi)
                    if delta_x[i] > np.pi:
                        delta_x[i] = delta_x[i] - 2 * np.pi
                    # randomly pick either direction
                    rand_d = np.random.randint(2)
                    if rand_d < 1 and np.abs(delta_x[i]) >= np.pi * 0.5:
                        if delta_x[i] > 0.:
                            delta_x[i] = delta_x[i] - 2 * np.pi
                        elif delta_x[i] <= 0.:
                            delta_x[i] = delta_x[i] + 2 * np.pi
            #next_state = state[max_d_i] + delta_x
            res = Node(next_state)
            # initial: from max_d_i to max_d_i+1
            x_init = np.linspace(next_state, next_state + delta_x,
                                 num_steps) + rand_x_init
            # action: copy over to number of steps
            u_init_i = np.random.uniform(low=[-4.],
                                         high=[4],
                                         size=(num_steps, 1))
            u_init = u_init_i
            cost_i = (num_steps - 1) * step_sz
            #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i))
            #u_init = u_init + np.random.normal(scale=1., size=u_init.shape)
            t_init = np.linspace(0, cost_i, num_steps)
        return x_init, u_init, t_init

    # load data
    print('loading...')
    if args.seen_N > 0:
        seen_test_data = data_loader.load_test_dataset(args.seen_N,
                                                       args.seen_NP,
                                                       args.data_folder, obs_f,
                                                       args.seen_s,
                                                       args.seen_sp)
    if args.unseen_N > 0:
        unseen_test_data = data_loader.load_test_dataset(
            args.unseen_N, args.unseen_NP, args.data_folder, obs_f,
            args.unseen_s, args.unseen_sp)
    # test
    # testing

    print('testing...')
    seen_test_suc_rate = 0.
    unseen_test_suc_rate = 0.
    T = 1
    for _ in range(T):
        # unnormalize function
        normalize_func = lambda x: normalize(x, args.world_size)
        unnormalize_func = lambda x: unnormalize(x, args.world_size)
        # seen
        if args.seen_N > 0:
            time_file = os.path.join(
                args.model_path,
                'time_seen_epoch_%d_mlp.p' % (args.start_epoch))
            fes_path_, valid_path_ = eval_tasks(
                mpNet0, mpNet1, seen_test_data, args.model_path, time_file,
                IsInCollision, normalize_func, unnormalize_func, informer,
                init_informer, system, dynamics, xdot, jax_dynamics,
                enforce_bounds, traj_opt, step_sz, num_steps)
            valid_path = valid_path_.flatten()
            fes_path = fes_path_.flatten(
            )  # notice different environments are involved
            seen_test_suc_rate += fes_path.sum() / valid_path.sum()
        # unseen
        if args.unseen_N > 0:
            time_file = os.path.join(
                args.model_path,
                'time_unseen_epoch_%d_mlp.p' % (args.start_epoch))
            fes_path_, valid_path_ = eval_tasks(
                mpNet0, mpNet1, unseen_test_data, args.model_path, time_file,
                IsInCollision, normalize_func, unnormalize_func, informer,
                init_informer, system, dynamics, xdot, jax_dynamics,
                enforce_bounds, traj_opt, step_sz, num_steps)
            valid_path = valid_path_.flatten()
            fes_path = fes_path_.flatten(
            )  # notice different environments are involved
            unseen_test_suc_rate += fes_path.sum() / valid_path.sum()
    if args.seen_N > 0:
        seen_test_suc_rate = seen_test_suc_rate / T
        f = open(
            os.path.join(args.model_path,
                         'seen_accuracy_epoch_%d.txt' % (args.start_epoch)),
            'w')
        f.write(str(seen_test_suc_rate))
        f.close()
    if args.unseen_N > 0:
        unseen_test_suc_rate = unseen_test_suc_rate / T  # Save the models
        f = open(
            os.path.join(args.model_path,
                         'unseen_accuracy_epoch_%d.txt' % (args.start_epoch)),
            'w')
        f.write(str(unseen_test_suc_rate))
        f.close()
Пример #3
0
def main(args):
    # set seed
    print(args.model_path)
    torch_seed = np.random.randint(low=0, high=1000)
    np_seed = np.random.randint(low=0, high=1000)
    py_seed = np.random.randint(low=0, high=1000)
    #torch.manual_seed(torch_seed)
    np.random.seed(np_seed)
    random.seed(py_seed)
    # Build the models
    #if torch.cuda.is_available():
    #    torch.cuda.set_device(args.device)

    # setup evaluation function and load function
    if args.env_type == 'pendulum':
        obs_file = None
        obc_file = None
        obs_f = False
        #system = standard_cpp_systems.PSOPTPendulum()
        #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
    elif args.env_type == 'cartpole_obs':
        step_sz = 0.002
        num_steps = 21
        goal_radius = 1.5
        random_seed = 0
        delta_near = 2.0
        delta_drain = 1.2
        cost_threshold = 1.2
        min_time_steps = 10
        max_time_steps = 200
        integration_step = 0.002
        obs_f = True
        obs_file = None
        obc_file = None
        system = _sst_module.PSOPTCartPole()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)
        obs_width = 4.0
        IsInCollision = cartpole_IsInCollision
        enforce_bounds = cartpole_enforce_bounds

    elif args.env_type == 'acrobot_obs':
        obs_file = None
        obc_file = None
        system = _sst_module.PSOPTAcrobot()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)

        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 21
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(
            x0, x1, 200, num_steps, step_sz * 1, step_sz *
            (num_steps - 1), x_init, u_init, t_init)
        goal_S0 = np.diag([1., 1., 0, 0])
        #goal_S0 = np.identity(4)
        goal_rho0 = 1.0
        IsInCollision = acrobot_IsInCollision
        enforce_bounds = acrobot_enforce_bounds

    if args.env_type == 'pendulum':
        step_sz = 0.002
        num_steps = 20

    elif args.env_type in [
            'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4',
            'acrobot_obs_8'
    ]:
        #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
        obs_width = 6.0
        step_sz = 0.02
        num_steps = 21
        goal_radius = 2.0
        random_seed = 0
        delta_near = 0.1
        delta_drain = 0.05

    # load previously trained model if start epoch > 0
    #model_path='kmpnet_epoch_%d_direction_0_step_%d.pkl' %(args.start_epoch, args.num_steps)
    mlp_path = os.path.join(os.getcwd() + '/c++/',
                            'acrobot_mlp_annotated_test_gpu.pt')
    encoder_path = os.path.join(os.getcwd() + '/c++/',
                                'acrobot_encoder_annotated_test_cpu.pt')
    print('mlp_path:')
    print(mlp_path)

    #####################################################
    def plan_one_path(obs_i, obs, obc, detailed_data_path, data_path,
                      start_state, goal_state, goal_inform_state, cost_i,
                      max_iteration, out_queue_t, out_queue_cost, random_seed):
        if args.env_type == 'pendulum':
            system = standard_cpp_systems.PSOPTPendulum()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps,
                                                       1, 20, step_sz)

        elif args.env_type == 'cartpole_obs':
            #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
            obs_width = 4.0
            psopt_system = _sst_module.PSOPTCartPole()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 4., 'cartpole')
            #distance_computer = propagate_system.distance_computer()
            distance_computer = _sst_module.euclidean_distance(
                np.array(propagate_system.is_circular_topology()))
            step_sz = 0.002
            num_steps = 21
            goal_radius = 1.5
            random_seed = 0
            delta_near = 2.0
            delta_drain = 1.2
            #delta_near=.2
            #delta_drain=.1
            cost_threshold = 1.05
            min_time_steps = 10
            max_time_steps = 200
            #min_time_steps = 5
            #max_time_steps = 400
            integration_step = 0.002
        elif args.env_type in [
                'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3',
                'acrobot_obs_4', 'acrobot_obs_8'
        ]:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            step_sz = 0.02
            num_steps = 21
            goal_radius = 2.0
            random_seed = 0
            delta_near = 1.0
            delta_drain = 0.5
            cost_threshold = 1.05
            min_time_steps = 5
            max_time_steps = 100
            integration_step = 0.02
        planner = _sst_module.SSTWrapper(
            state_bounds=propagate_system.get_state_bounds(),
            control_bounds=propagate_system.get_control_bounds(),
            distance=distance_computer,
            start_state=start_state,
            goal_state=goal_state,
            goal_radius=goal_radius,
            random_seed=random_seed,
            sst_delta_near=delta_near,
            sst_delta_drain=delta_drain)
        #print('creating planner...')
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()

        for i in range(max_iteration):
            planner.step(propagate_system, min_time_steps, max_time_steps,
                         integration_step)

            # early break for initial path
            solution = planner.get_solution()
            if solution is not None:
                #print('solution found already.')
                # based on cost break
                xs, us, ts = solution
                t_sst = np.sum(ts)
                #print(t_sst)
                #print(cost_i)
                if t_sst <= cost_i * cost_threshold:
                    print('solved in %d iterations' % (i))
                    break
        plan_time = time.time() - time0
        solution = planner.get_solution()
        xs, us, ts = solution
        print(np.linalg.norm(np.array(xs[-1]) - goal_state))
        """
        # visualization
        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_autoscale_on(True)
        #ax.set_xlim(-30, 30)
        ax.set_xlim(-np.pi, np.pi)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        #print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()
            
        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2*np.pi
            return res
        dx = 1
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        #imax = int(2*30./dx)
        imax = int(2*np.pi/dtheta)
        jmin = 0
        jmax = int(2*np.pi/dtheta)


        for i in range(imin, imax):
            for j in range(jmin, jmax):
                x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
        ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')
        #for i in range(len(data)):
        #    update_line(hl, ax, data[i])
        draw_update_line(ax)
        #state_t = start_state

        xs_to_plot = np.array(detailed_data_path)
        for i in range(len(xs_to_plot)):
            xs_to_plot[i] = wrap_angle(detailed_data_path[i], propagate_system)
        ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='lightgreen', s=0.5)
        # draw start and goal
        #ax.scatter(start_state[0], goal_state[0], marker='X')
        draw_update_line(ax)
        #plt.waitforbuttonpress()

        
        if solution is not None:
            xs, us, ts = solution
            
            # propagate data
            p_start = xs[0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(us)):
                #state_i.append(len(detail_paths)-1)
                print(ts[k])
                max_steps = int(np.round(ts[k]/step_sz))
                accum_cost = 0.
                #print('p_start:')
                #print(p_start)
                #print('data:')
                #print(paths[i][j][k])
                # modify it because of small difference between data and actual propagation
                for step in range(1,max_steps+1):
                    p_start = dynamics(p_start, us[k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    accum_cost += step_sz
                    if (step % 1 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        cost.append(accum_cost)
                        accum_cost = 0.
            #print('p_start:')
            #print(p_start)
            #print('data:')
            #print(paths[i][j][-1])
            #state[-1] = xs[-1]
            
            
            
            xs_to_plot = np.array(state)
            for i in range(len(xs_to_plot)):
                xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system)
            ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green', s=0.5)
            start_state_np = np.array(start_state)
            goal_state_np = np.array(goal_state)
            ax.scatter([start_state_np[0]], [start_state_np[1]], c='blue', marker='*')
            ax.scatter([goal_state_np[0]], [goal_state_np[1]], c='red', marker='*')

            # draw start and goal
            #ax.scatter(start_state[0], goal_state[0], marker='X')
            draw_update_line(ax)
            plt.waitforbuttonpress()
        """

        # validate if the path contains collision
        if solution is not None:
            res_x, res_u, res_t = solution

            print('solution_x:')
            print(res_x)
            print('path_x:')
            print(np.array(data_path))

            # propagate data
            p_start = res_x[0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(res_u)):
                #state_i.append(len(detail_paths)-1)
                max_steps = int(np.round(res_t[k] / step_sz))
                accum_cost = 0.
                #print('p_start:')
                #print(p_start)
                #print('data:')
                #print(paths[i][j][k])
                # modify it because of small difference between data and actual propagation
                #p_start = res_x[k]
                #state[-1] = res_x[k]
                for step in range(1, max_steps + 1):
                    p_start = dynamics(p_start, res_u[k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    accum_cost += step_sz
                    if (step % 1 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        cost.append(accum_cost)
                        accum_cost = 0.
                        # check collision for the new state
                        if IsInCollision(p_start, obs_i):
                            print(
                                'collision happens at u_index: %d, step: %d' %
                                (k, step))
                        assert not IsInCollision(p_start, obs_i)

            #print('p_start:')
            #print(p_start)
            #print('data:')
            #print(paths[i][j][-1])
            #state[-1] = res_x[-1]
        # validation end

        print('plan time: %fs' % (plan_time))
        if solution is None:
            print('failed.')
            out_queue_t.put(-1)
            out_queue_cost.put(-1)
        else:
            print('path succeeded.')
            out_queue_t.put(plan_time)
            out_queue_cost.put(t_sst)

    ####################################################################################

    # load data
    print('loading...')
    if args.seen_N > 0:
        seen_test_data = data_loader.load_test_dataset(args.seen_N,
                                                       args.seen_NP,
                                                       args.data_folder, obs_f,
                                                       args.seen_s,
                                                       args.seen_sp)
    if args.unseen_N > 0:
        unseen_test_data = data_loader.load_test_dataset(
            args.unseen_N, args.unseen_NP, args.data_folder, obs_f,
            args.unseen_s, args.unseen_sp)
    # test
    # testing

    queue_t = Queue(1)
    queue_cost = Queue(1)
    print('testing...')
    seen_test_suc_rate = 0.
    unseen_test_suc_rate = 0.

    obc, obs, paths, sgs, path_lengths, controls, costs = seen_test_data

    obc = obc.astype(np.float32)
    # for all planning, use a flattened vector to store
    plan_times = []
    plan_res_all = []
    plan_costs = []
    data_costs = []

    # store in a 2d vector, for env and path
    plan_res_env = []
    plan_time_env = []
    plan_cost_env = []
    data_cost_env = []

    # directory to save the results
    res_path = args.res_path
    res_path = res_path + args.env_type + "_sst_compare_with_mpnet/"

    if args.env_type == 'acrobot_obs':
        res_path = '/media/arclabdl1/HD1/YLmiao/mpc-mpnet-cuda-yinglong/results/cpp_full/acrobot_obs/default_small_model_batch/'
    elif args.env_type == 'cartpole_obs':
        res_path = '/media/arclabdl1/HD1/YLmiao/mpc-mpnet-cuda-yinglong/results/cpp_full/cartpole_obs/default_small_model_batch/'

    mpnet_tree_time = np.load(res_path + 'time_10_100.npy', allow_pickle=True)
    mpnet_tree_sr = np.load(res_path + 'sr_10_100.npy', allow_pickle=True)
    mpnet_tree_cost = np.load(res_path + 'costs_10_100.npy', allow_pickle=True)
    if not os.path.exists(res_path):
        os.makedirs(res_path)

    for i in range(len(paths)):
        new_obs_i = []
        obs_i = obs[i]
        plan_res_path = []
        plan_time_path = []
        plan_cost_path = []
        data_cost_path = []
        for k in range(len(obs_i)):
            obs_pt = []
            obs_pt.append(obs_i[k][0] - obs_width / 2)
            obs_pt.append(obs_i[k][1] - obs_width / 2)
            obs_pt.append(obs_i[k][0] - obs_width / 2)
            obs_pt.append(obs_i[k][1] + obs_width / 2)
            obs_pt.append(obs_i[k][0] + obs_width / 2)
            obs_pt.append(obs_i[k][1] + obs_width / 2)
            obs_pt.append(obs_i[k][0] + obs_width / 2)
            obs_pt.append(obs_i[k][1] - obs_width / 2)
            new_obs_i.append(obs_pt)
        obs_i = new_obs_i
        #print(obs_i)
        for j in range(len(paths[i])):
            start_state = sgs[i][j][0]
            #goal_inform_state = paths[i][j][-1]
            goal_inform_state = sgs[i][j][1]
            goal_state = sgs[i][j][1]
            # propagate data
            p_start = paths[i][j][0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(controls[i][j])):
                #state_i.append(len(detail_paths)-1)
                #max_steps = int(costs[i][j][k]/step_sz)
                max_steps = 1000000
                accum_cost = 0.
                for step in range(1, max_steps + 1):
                    p_start = dynamics(p_start, controls[i][j][k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    accum_cost += step_sz
                    if (step % 1 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        cost.append(accum_cost)
                        accum_cost = 0.
                        # check collision for the new state
                        if IsInCollision(p_start, obs_i):
                            print(
                                'collision happens at u_index: %d, step: %d' %
                                (k, step))
                        assert not IsInCollision(p_start, obs_i)
                    if np.linalg.norm(p_start - paths[i][j][k + 1]) <= 1e-3:
                        break
        # validation end

            cost_i = np.sum(cost)
            print('data cost:', cost_i)
            if mpnet_tree_sr[i][j] != 0:
                # use MPNet tree cost
                cost_i = mpnet_tree_cost[i][j]
                print('using mpnet cost: ', cost_i)

            #cost_i = 100000000.
            # acrobot: 300000
            # cartpole: 500000
            print('environment: %d/%d, path: %d/%d' %
                  (i + 1, len(paths), j + 1, len(paths[i])))
            plan_t_trials = []
            plan_cost_trials = []
            for trial in range(1):
                random_seed = random.randint(0, 100)
                #random_seed = 0
                p = Process(target=plan_one_path,
                            args=(obs_i, obs[i], obc[i], state, paths[i][j],
                                  start_state, goal_state, goal_inform_state,
                                  cost_i, args.num_iter, queue_t, queue_cost,
                                  random_seed))
                #plan_one_path(obs_i, obs[i], obc[i], state, paths[i][j], start_state, goal_state, goal_inform_state, cost_i, args.num_iter, queue_t, queue_cost, random_seed)
                p.start()
                p.join()
                plan_t = queue_t.get()
                plan_cost = queue_cost.get()
                if plan_t != -1:
                    plan_t_trials.append(plan_t)
                    plan_cost_trials.append(plan_cost)
            #assert len(plan_ts) == 10

            plan_t = np.mean(plan_t_trials)
            plan_cost = np.mean(plan_cost_trials)

            if plan_t == -1:
                # failed, do not record in the flattened list
                plan_res_all.append(0)
                # record in the 2d list
                plan_res_path.append(0)
                plan_time_path.append(plan_t)
                plan_cost_path.append(plan_cost)
                data_cost_path.append(-1.0)
            else:
                # record in the flattened list
                plan_res_all.append(1)
                plan_times.append(plan_t)
                plan_costs.append(plan_cost)
                data_costs.append(cost_i)
                # record in the 2d list
                plan_res_path.append(1)
                plan_time_path.append(plan_t)
                plan_cost_path.append(plan_cost)
                data_cost_path.append(cost_i)
            print('plan costs:')
            print(plan_costs)
            print('average accuracy up to now: %f' %
                  (np.array(plan_res_all).flatten().mean()))
            print('plan average time: %f' % (np.array(plan_times).mean()))
            print('plan time std: %f' % (np.array(plan_times).std()))
            print('plan average cost: %f' % (np.array(plan_costs).mean()))
            print('plan cost std: %f' % (np.array(plan_costs).std()))
            print('data average cost: %f' % (np.array(data_costs).mean()))
            print('data cost std: %f' % (np.array(data_costs).std()))

        # store in the 2d list
        plan_res_env.append(plan_res_path)
        plan_time_env.append(plan_time_path)
        plan_cost_env.append(plan_cost_path)
        data_cost_env.append(data_cost_path)

        # for every environment planned, save
        # save the 2d list
        # save as numpy array
        #np.save(res_path+"plan_res.npy", np.array(plan_res_env))
        #np.save(res_path+"plan_time.npy", np.array(plan_time_env))
        #np.save(res_path+"plan_cost.npy", np.array(plan_cost_env))
        #np.save(res_path+"data_cost.npy", np.array(data_cost_env))

    print('plan accuracy: %f' % (np.array(plan_res_all).flatten().mean()))
    print('plan average time: %f' % (np.array(plan_times).mean()))
    print('plan time std: %f' % (np.array(plan_times).std()))
    print('plan average cost: %f' % (np.array(plan_costs).mean()))
    print('plan cost std: %f' % (np.array(plan_costs).std()))
    print('data average cost: %f' % (np.array(data_costs).mean()))
    print('data cost std: %f' % (np.array(data_costs).std()))

    # save the 2d list
    # save as numpy array
    plan_res_env = np.array(plan_res_env)
    plan_time_env = np.array(plan_time_env)
    plan_cost_env = np.array(plan_cost_env)
    data_cost_env = np.array(data_cost_env)

    np.save(res_path + "plan_res.npy", plan_res_env)
    np.save(res_path + "plan_time.npy", plan_time_env)
    np.save(res_path + "plan_cost.npy", plan_cost_env)
    np.save(res_path + "data_cost.npy", data_cost_env)
Пример #4
0
    def plan_one_path(obs_i, obs, obc, detailed_data_path, data_path,
                      start_state, goal_state, goal_inform_state, cost_i,
                      max_iteration, out_queue_t, out_queue_cost, random_seed):
        if args.env_type == 'pendulum':
            system = standard_cpp_systems.PSOPTPendulum()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps,
                                                       1, 20, step_sz)

        elif args.env_type == 'cartpole_obs':
            #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
            obs_width = 4.0
            psopt_system = _sst_module.PSOPTCartPole()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 4., 'cartpole')
            #distance_computer = propagate_system.distance_computer()
            distance_computer = _sst_module.euclidean_distance(
                np.array(propagate_system.is_circular_topology()))
            step_sz = 0.002
            num_steps = 21
            goal_radius = 1.5
            random_seed = 0
            delta_near = 2.0
            delta_drain = 1.2
            #delta_near=.2
            #delta_drain=.1
            cost_threshold = 1.05
            min_time_steps = 10
            max_time_steps = 200
            #min_time_steps = 5
            #max_time_steps = 400
            integration_step = 0.002
        elif args.env_type in [
                'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3',
                'acrobot_obs_4', 'acrobot_obs_8'
        ]:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            step_sz = 0.02
            num_steps = 21
            goal_radius = 2.0
            random_seed = 0
            delta_near = 1.0
            delta_drain = 0.5
            cost_threshold = 1.05
            min_time_steps = 5
            max_time_steps = 100
            integration_step = 0.02
        planner = _sst_module.SSTWrapper(
            state_bounds=propagate_system.get_state_bounds(),
            control_bounds=propagate_system.get_control_bounds(),
            distance=distance_computer,
            start_state=start_state,
            goal_state=goal_state,
            goal_radius=goal_radius,
            random_seed=random_seed,
            sst_delta_near=delta_near,
            sst_delta_drain=delta_drain)
        #print('creating planner...')
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()

        for i in range(max_iteration):
            planner.step(propagate_system, min_time_steps, max_time_steps,
                         integration_step)

            # early break for initial path
            solution = planner.get_solution()
            if solution is not None:
                #print('solution found already.')
                # based on cost break
                xs, us, ts = solution
                t_sst = np.sum(ts)
                #print(t_sst)
                #print(cost_i)
                if t_sst <= cost_i * cost_threshold:
                    print('solved in %d iterations' % (i))
                    break
        plan_time = time.time() - time0
        solution = planner.get_solution()
        xs, us, ts = solution
        print(np.linalg.norm(np.array(xs[-1]) - goal_state))
        """
        # visualization
        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_autoscale_on(True)
        #ax.set_xlim(-30, 30)
        ax.set_xlim(-np.pi, np.pi)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        #print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()
            
        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2*np.pi
            return res
        dx = 1
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        #imax = int(2*30./dx)
        imax = int(2*np.pi/dtheta)
        jmin = 0
        jmax = int(2*np.pi/dtheta)


        for i in range(imin, imax):
            for j in range(jmin, jmax):
                x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
        ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')
        #for i in range(len(data)):
        #    update_line(hl, ax, data[i])
        draw_update_line(ax)
        #state_t = start_state

        xs_to_plot = np.array(detailed_data_path)
        for i in range(len(xs_to_plot)):
            xs_to_plot[i] = wrap_angle(detailed_data_path[i], propagate_system)
        ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='lightgreen', s=0.5)
        # draw start and goal
        #ax.scatter(start_state[0], goal_state[0], marker='X')
        draw_update_line(ax)
        #plt.waitforbuttonpress()

        
        if solution is not None:
            xs, us, ts = solution
            
            # propagate data
            p_start = xs[0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(us)):
                #state_i.append(len(detail_paths)-1)
                print(ts[k])
                max_steps = int(np.round(ts[k]/step_sz))
                accum_cost = 0.
                #print('p_start:')
                #print(p_start)
                #print('data:')
                #print(paths[i][j][k])
                # modify it because of small difference between data and actual propagation
                for step in range(1,max_steps+1):
                    p_start = dynamics(p_start, us[k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    accum_cost += step_sz
                    if (step % 1 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        cost.append(accum_cost)
                        accum_cost = 0.
            #print('p_start:')
            #print(p_start)
            #print('data:')
            #print(paths[i][j][-1])
            #state[-1] = xs[-1]
            
            
            
            xs_to_plot = np.array(state)
            for i in range(len(xs_to_plot)):
                xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system)
            ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green', s=0.5)
            start_state_np = np.array(start_state)
            goal_state_np = np.array(goal_state)
            ax.scatter([start_state_np[0]], [start_state_np[1]], c='blue', marker='*')
            ax.scatter([goal_state_np[0]], [goal_state_np[1]], c='red', marker='*')

            # draw start and goal
            #ax.scatter(start_state[0], goal_state[0], marker='X')
            draw_update_line(ax)
            plt.waitforbuttonpress()
        """

        # validate if the path contains collision
        if solution is not None:
            res_x, res_u, res_t = solution

            print('solution_x:')
            print(res_x)
            print('path_x:')
            print(np.array(data_path))

            # propagate data
            p_start = res_x[0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(res_u)):
                #state_i.append(len(detail_paths)-1)
                max_steps = int(np.round(res_t[k] / step_sz))
                accum_cost = 0.
                #print('p_start:')
                #print(p_start)
                #print('data:')
                #print(paths[i][j][k])
                # modify it because of small difference between data and actual propagation
                #p_start = res_x[k]
                #state[-1] = res_x[k]
                for step in range(1, max_steps + 1):
                    p_start = dynamics(p_start, res_u[k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    accum_cost += step_sz
                    if (step % 1 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        cost.append(accum_cost)
                        accum_cost = 0.
                        # check collision for the new state
                        if IsInCollision(p_start, obs_i):
                            print(
                                'collision happens at u_index: %d, step: %d' %
                                (k, step))
                        assert not IsInCollision(p_start, obs_i)

            #print('p_start:')
            #print(p_start)
            #print('data:')
            #print(paths[i][j][-1])
            #state[-1] = res_x[-1]
        # validation end

        print('plan time: %fs' % (plan_time))
        if solution is None:
            print('failed.')
            out_queue_t.put(-1)
            out_queue_cost.put(-1)
        else:
            print('path succeeded.')
            out_queue_t.put(plan_time)
            out_queue_cost.put(t_sst)
Пример #5
0
def main(args):
    # set seed
    print(args.model_path)
    torch_seed = np.random.randint(low=0, high=1000)
    np_seed = np.random.randint(low=0, high=1000)
    py_seed = np.random.randint(low=0, high=1000)
    #torch.manual_seed(torch_seed)
    np.random.seed(np_seed)
    random.seed(py_seed)
    # Build the models
    #if torch.cuda.is_available():
    #    torch.cuda.set_device(args.device)

    # setup evaluation function and load function
    if args.env_type == 'pendulum':
        obs_file = None
        obc_file = None
        obs_f = False
        #system = standard_cpp_systems.PSOPTPendulum()
        #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
    elif args.env_type == 'cartpole_obs':
        normalize = cartpole.normalize
        unnormalize = cartpole.unnormalize
        obs_file = None
        obc_file = None
        #dynamics = cartpole.dynamics
        #jax_dynamics = cartpole.jax_dynamics
        #enforce_bounds = cartpole.enforce_bounds
        cae = CAE_acrobot_voxel_2d
        mlp = mlp_acrobot.MLP
        obs_f = True
        #system = standard_cpp_systems.RectangleObs(obs_list, args.obs_width, 'cartpole')
        #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
    elif args.env_type == 'acrobot_obs':
        obs_file = None
        obc_file = None
        system = _sst_module.PSOPTAcrobot()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)

        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 20
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(x0, x1, 200, num_steps, step_sz*1, step_sz*(num_steps-1), x_init, u_init, t_init)
        obs_width = 6.0
        step_sz = 0.02
        num_steps = 20
        goal_radius=2.0
        random_seed=0
        delta_near=0.1
        delta_drain=0.05


    elif args.env_type in ['acrobot_obs','acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8']:
        #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
        obs_width = 6.0
        step_sz = 0.02
        num_steps = 20
        goal_radius=2.0
        random_seed=0
        delta_near=0.1
        delta_drain=0.05

    # load previously trained model if start epoch > 0
    #model_path='kmpnet_epoch_%d_direction_0_step_%d.pkl' %(args.start_epoch, args.num_steps)
    mlp_path = os.path.join(os.getcwd()+'/c++/','acrobot_obs_MLP_lr0.010000_epoch_2850_step_20.pt')
    encoder_path = os.path.join(os.getcwd()+'/c++/','acrobot_obs_encoder_lr0.010000_epoch_2850_step_20.pt')
    cost_mlp_path = os.path.join(os.getcwd()+'/c++/','costnet_acrobot_obs_8_MLP_epoch_300_step_20.pt')
    cost_encoder_path = os.path.join(os.getcwd()+'/c++/','costnet_acrobot_obs_8_encoder_epoch_300_step_20.pt')

    print('mlp_path:')
    print(mlp_path)
    #####################################################
    def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, max_iteration, data, out_queue):
        if args.env_type == 'pendulum':
            system = standard_cpp_systems.PSOPTPendulum()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz)

        elif args.env_type == 'cartpole_obs':
            #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
            system = _sst_module.CartPole()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve(x0, x1, 200, num_steps, step_sz*1, step_sz*50, x_init, u_init, t_init)
            goal_S0 = np.identity(4)
            goal_rho0 = 1.0
        elif args.env_type in ['acrobot_obs','acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8']:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            bvp_wrapper = _sst_module.PSOPTBVPWrapper(psopt_system, 4, 1, 0)
            step_sz = 0.02
            num_steps = 20
            psopt_num_steps = 20
            psopt_step_sz = 0.02
            goal_radius=2
            random_seed=0
            #delta_near=1.0
            #delta_drain=0.5
            delta_near=0.1
            delta_drain=0.05
        #print('creating planner...')
        planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, 
                                              cost_mlp_path, cost_encoder_path, 
                                              20, psopt_num_steps+1, psopt_step_sz, step_sz, propagate_system, args.device)
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()
        #print('obc:')
        #print(obc.shape)
        #print(delta_near)
        #print(delta_drain)
        #print('start_state:')
        #print(start_state)
        #print('goal_state:')
        #print(goal_state)

        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_autoscale_on(True)
        ax.set_xlim(-np.pi, np.pi)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')
        
        #print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()

        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2*np.pi
            return res
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        imax = int(2*np.pi/dtheta)
        circular = psopt_system.is_circular_topology()


        for i in range(imin, imax):
            for j in range(imin, imax):
                x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
        ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')
        #for i in range(len(data)):
        #    update_line(hl, ax, data[i])
        
        data = np.array(data)
        ax.scatter(data[:,0], data[:,1], c='lightblue', s=10)
        ax.scatter(data[-1,0], data[-1,1], c='red', s=10, marker='*')

        draw_update_line(ax)
        state_t = start_state

        state_t = data[0]
        for data_i in range(0,len(data),num_steps):
            print('iteration: %d' % (data_i))
            print('state_t:')
            print(state_t)    

            
            min_dis_to_goal = 100000.
            min_xs_to_plot = []
            for trials in range(10):
                x_init, u_init, t_init = init_informer(propagate_system, state_t, data[data_i], psopt_num_steps+1, psopt_step_sz)
                print('x_init:')
                print(x_init)

                bvp_x, bvp_u, bvp_t = bvp_wrapper.solve(state_t, x_init[-1], 20, psopt_num_steps+1, 0.8*psopt_step_sz*psopt_num_steps, 2*psopt_step_sz*psopt_num_steps, \
                                                        x_init, u_init, t_init)
                print('bvp_x:')
                print(bvp_x)
                print('bvp_u:')
                print(bvp_u)
                print('bvp_t:')
                print(bvp_t)
                if len(bvp_u) != 0:# and bvp_t[0] > 0.01:  # turn bvp_t off if want to use step_bvp
                    # propagate data
                    #p_start = bvp_x[0]
                    p_start = state_t
                    detail_paths = [p_start]
                    detail_controls = []
                    detail_costs = []
                    state = [p_start]
                    control = []
                    cost = []
                    for k in range(len(bvp_t)):
                        #state_i.append(len(detail_paths)-1)
                        max_steps = int(np.round(bvp_t[k]/step_sz))
                        accum_cost = 0.
                        for step in range(1,max_steps+1):
                            p_start = dynamics(p_start, bvp_u[k], step_sz)
                            p_start = enforce_bounds(p_start)
                            detail_paths.append(p_start)
                            accum_cost += step_sz
                            if (step % 1 == 0) or (step == max_steps):
                                state.append(p_start)
                                cost.append(accum_cost)
                                accum_cost = 0.

                    xs_to_plot = np.array(state)
                    
                    for i in range(len(xs_to_plot)):
                        xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system)
                    delta_x = xs_to_plot[-1] - data[data_i]
                    for i in range(len(delta_x)):
                        if circular[i]:
                            delta_x[i] = delta_x[i] - np.floor(delta_x[i] / (2*np.pi))*(2*np.pi)
                            if delta_x[i] > np.pi:
                                delta_x[i] = delta_x[i] - 2*np.pi
                    dis = np.linalg.norm(delta_x)
                    if dis <= min_dis_to_goal:
                        min_dis_to_goal = dis
                        min_xs_to_plot = xs_to_plot

            #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green')
            ax.scatter(min_xs_to_plot[:,0], min_xs_to_plot[:,1], c='green', s=10.0)

            # draw start and goal
            #ax.scatter(start_state[0], goal_state[0], marker='X')
            draw_update_line(ax)
            #state_t = min_xs_to_plot[-1]
            # try using mpnet_res as new start

            state_t = data[data_i]




            #state_t = min_xs_to_plot[-1]
            print('data_i:')

            print(data[data_i])
            #else:
            #    # in incollision
            #    state_t = data[data_i]
        #if len(res_x) == 0:
        #    print('failed.')
        out_queue.put(0)
        #else:
        #    print('path succeeded.')
        #    out_queue.put(1)
    ####################################################################################



    # load data
    print('loading...')
    if args.seen_N > 0:
        seen_test_data = data_loader.load_test_dataset(args.seen_N, args.seen_NP,
                                  args.data_folder, obs_f, args.seen_s, args.seen_sp)
    if args.unseen_N > 0:
        unseen_test_data = data_loader.load_test_dataset(args.unseen_N, args.unseen_NP,
                                  args.data_folder, obs_f, args.unseen_s, args.unseen_sp)
    # test
    # testing

    queue = Queue(1)
    print('testing...')
    seen_test_suc_rate = 0.
    unseen_test_suc_rate = 0.

    obc, obs, paths, sgs, path_lengths, controls, costs = seen_test_data
    obc = obc.astype(np.float32)
    #obc = torch.from_numpy(obc)
    #if torch.cuda.is_available():
    #    obc = obc.cuda()
    for i in range(len(paths)):
        new_obs_i = []
        obs_i = obs[i]
        for k in range(len(obs_i)):
            obs_pt = []
            obs_pt.append(obs_i[k][0]-obs_width/2)
            obs_pt.append(obs_i[k][1]-obs_width/2)
            obs_pt.append(obs_i[k][0]-obs_width/2)
            obs_pt.append(obs_i[k][1]+obs_width/2)
            obs_pt.append(obs_i[k][0]+obs_width/2)
            obs_pt.append(obs_i[k][1]+obs_width/2)
            obs_pt.append(obs_i[k][0]+obs_width/2)
            obs_pt.append(obs_i[k][1]-obs_width/2)
            new_obs_i.append(obs_pt)
        obs_i = new_obs_i
        #print(obs_i)
        for j in range(len(paths[i])):
            start_state = sgs[i][j][0]
            goal_inform_state = paths[i][j][-1]
            goal_state = sgs[i][j][1]
            #p = Process(target=plan_one_path, args=(obs[i], obc[i], start_state, goal_state, 500, queue))
            
            # propagate data
            p_start = paths[i][j][0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(controls[i][j])):
                #state_i.append(len(detail_paths)-1)
                max_steps = int(costs[i][j][k]/step_sz)
                accum_cost = 0.
                #print('p_start:')
                #print(p_start)
                #print('data:')
                #print(paths[i][j][k])
                # modify it because of small difference between data and actual propagation
                #p_start = paths[i][j][k]
                #state[-1] = paths[i][j][k]
                for step in range(1,max_steps+1):
                    p_start = dynamics(p_start, controls[i][j][k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    detail_controls.append(controls[i][j])
                    detail_costs.append(step_sz)
                    accum_cost += step_sz
                    if (step % 1 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        control.append(controls[i][j][k])
                        cost.append(accum_cost)
                        accum_cost = 0.
            #print('p_start:')
            #print(p_start)
            #print('data:')
            #print(paths[i][j][-1])
            state[-1] = paths[i][j][-1]
            data = state

            plan_one_path(obs_i, obs[i], obc[i], start_state, goal_state, goal_inform_state, 1000, data, queue)
Пример #6
0
    def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, max_iteration, data, out_queue):
        if args.env_type == 'pendulum':
            system = standard_cpp_systems.PSOPTPendulum()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz)

        elif args.env_type == 'cartpole_obs':
            #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
            system = _sst_module.CartPole()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve(x0, x1, 200, num_steps, step_sz*1, step_sz*50, x_init, u_init, t_init)
            goal_S0 = np.identity(4)
            goal_rho0 = 1.0
        elif args.env_type in ['acrobot_obs','acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8']:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            bvp_wrapper = _sst_module.PSOPTBVPWrapper(psopt_system, 4, 1, 0)
            step_sz = 0.02
            num_steps = 20
            psopt_num_steps = 20
            psopt_step_sz = 0.02
            goal_radius=2
            random_seed=0
            #delta_near=1.0
            #delta_drain=0.5
            delta_near=0.1
            delta_drain=0.05
        #print('creating planner...')
        planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, 
                                              cost_mlp_path, cost_encoder_path, 
                                              20, psopt_num_steps+1, psopt_step_sz, step_sz, propagate_system, args.device)
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()
        #print('obc:')
        #print(obc.shape)
        #print(delta_near)
        #print(delta_drain)
        #print('start_state:')
        #print(start_state)
        #print('goal_state:')
        #print(goal_state)

        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_autoscale_on(True)
        ax.set_xlim(-np.pi, np.pi)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')
        
        #print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()

        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2*np.pi
            return res
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        imax = int(2*np.pi/dtheta)
        circular = psopt_system.is_circular_topology()


        for i in range(imin, imax):
            for j in range(imin, imax):
                x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
        ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')
        #for i in range(len(data)):
        #    update_line(hl, ax, data[i])
        
        data = np.array(data)
        ax.scatter(data[:,0], data[:,1], c='lightblue', s=10)
        ax.scatter(data[-1,0], data[-1,1], c='red', s=10, marker='*')

        draw_update_line(ax)
        state_t = start_state

        state_t = data[0]
        for data_i in range(0,len(data),num_steps):
            print('iteration: %d' % (data_i))
            print('state_t:')
            print(state_t)    

            
            min_dis_to_goal = 100000.
            min_xs_to_plot = []
            for trials in range(10):
                x_init, u_init, t_init = init_informer(propagate_system, state_t, data[data_i], psopt_num_steps+1, psopt_step_sz)
                print('x_init:')
                print(x_init)

                bvp_x, bvp_u, bvp_t = bvp_wrapper.solve(state_t, x_init[-1], 20, psopt_num_steps+1, 0.8*psopt_step_sz*psopt_num_steps, 2*psopt_step_sz*psopt_num_steps, \
                                                        x_init, u_init, t_init)
                print('bvp_x:')
                print(bvp_x)
                print('bvp_u:')
                print(bvp_u)
                print('bvp_t:')
                print(bvp_t)
                if len(bvp_u) != 0:# and bvp_t[0] > 0.01:  # turn bvp_t off if want to use step_bvp
                    # propagate data
                    #p_start = bvp_x[0]
                    p_start = state_t
                    detail_paths = [p_start]
                    detail_controls = []
                    detail_costs = []
                    state = [p_start]
                    control = []
                    cost = []
                    for k in range(len(bvp_t)):
                        #state_i.append(len(detail_paths)-1)
                        max_steps = int(np.round(bvp_t[k]/step_sz))
                        accum_cost = 0.
                        for step in range(1,max_steps+1):
                            p_start = dynamics(p_start, bvp_u[k], step_sz)
                            p_start = enforce_bounds(p_start)
                            detail_paths.append(p_start)
                            accum_cost += step_sz
                            if (step % 1 == 0) or (step == max_steps):
                                state.append(p_start)
                                cost.append(accum_cost)
                                accum_cost = 0.

                    xs_to_plot = np.array(state)
                    
                    for i in range(len(xs_to_plot)):
                        xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system)
                    delta_x = xs_to_plot[-1] - data[data_i]
                    for i in range(len(delta_x)):
                        if circular[i]:
                            delta_x[i] = delta_x[i] - np.floor(delta_x[i] / (2*np.pi))*(2*np.pi)
                            if delta_x[i] > np.pi:
                                delta_x[i] = delta_x[i] - 2*np.pi
                    dis = np.linalg.norm(delta_x)
                    if dis <= min_dis_to_goal:
                        min_dis_to_goal = dis
                        min_xs_to_plot = xs_to_plot

            #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green')
            ax.scatter(min_xs_to_plot[:,0], min_xs_to_plot[:,1], c='green', s=10.0)

            # draw start and goal
            #ax.scatter(start_state[0], goal_state[0], marker='X')
            draw_update_line(ax)
            #state_t = min_xs_to_plot[-1]
            # try using mpnet_res as new start

            state_t = data[data_i]




            #state_t = min_xs_to_plot[-1]
            print('data_i:')

            print(data[data_i])
            #else:
            #    # in incollision
            #    state_t = data[data_i]
        #if len(res_x) == 0:
        #    print('failed.')
        out_queue.put(0)
Пример #7
0
def eval_tasks(mpNet0, mpNet1, env_type, test_data, save_dir, data_type, normalize_func = lambda x:x, unnormalize_func=lambda x: x, dynamics=None, jac_A=None, jac_B=None, enforce_bounds=None, IsInCollision=None):
    # data_type: seen or unseen
    obc, obs, paths, sgs, path_lengths, controls, costs = test_data
    if obs is not None:
        obc = obc.astype(np.float32)
        obc = torch.from_numpy(obc)
    if torch.cuda.is_available():
        obc = obc.cuda()

    if env_type == 'pendulum':
        system = standard_cpp_systems.PSOPTPendulum()
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
        step_sz = 0.002
        num_steps = 20
        traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 500, num_steps, 1, 20, step_sz)

    elif env_type == 'cartpole_obs':
        #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
        system = _sst_module.CartPole()
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.002
        num_steps = 20
        traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve(x0, x1, 500, num_steps, step_sz*1, step_sz*50, x_init, u_init, t_init)
        goal_S0 = np.identity(4)
        goal_rho0 = 1.0
    elif env_type in ['acrobot_obs','acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8']:
        #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
        obs_width = 6.0
        system = _sst_module.PSOPTAcrobot()
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 21
        traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve(x0, x1, 500, num_steps, step_sz*1, step_sz*(num_steps-1), x_init, u_init, t_init)
        #step_sz = 0.002
        goal_S0 = np.diag([1.,1.,0,0])
        #goal_S0 = np.identity(4)
        goal_rho0 = 1.0


    circular = system.is_circular_topology()
    def informer(env, x0, xG, direction):
        x0_x = torch.from_numpy(x0.x).type(torch.FloatTensor)
        xG_x = torch.from_numpy(xG.x).type(torch.FloatTensor)
        x0_x = normalize_func(x0_x)
        xG_x = normalize_func(xG_x)
        if torch.cuda.is_available():
            x0_x = x0_x.cuda()
            xG_x = xG_x.cuda()
        if direction == 0:
            x = torch.cat([x0_x,xG_x], dim=0)
            mpNet = mpNet0
            if torch.cuda.is_available():
                x = x.cuda()
            next_state = mpNet(x.unsqueeze(0), env.unsqueeze(0)).cpu().data
            next_state = unnormalize_func(next_state).numpy()[0]
            cov = np.diag([0.01,0.01,0.01,0.01])
            #mean = next_state
            #next_state = np.random.multivariate_normal(mean=next_state,cov=cov)
            mean = np.zeros(next_state.shape)
            rand_x_init = np.random.multivariate_normal(mean=mean, cov=cov, size=num_steps)
            rand_x_init[0] = rand_x_init[0]*0.
            rand_x_init[-1] = rand_x_init[-1]*0.
            delta_x = next_state - x0.x
            # can be either clockwise or counterclockwise, take shorter one
            for i in range(len(delta_x)):
                if circular[i]:
                    delta_x[i] = delta_x[i] - np.floor(delta_x[i] / (2*np.pi))*(2*np.pi)
                    if delta_x[i] > np.pi:
                        delta_x[i] = delta_x[i] - 2*np.pi
                    # randomly pick either direction
                    rand_d = np.random.randint(2)
                    if rand_d < 1 and np.abs(delta_x[i]) >= np.pi*0.5:
                        if delta_x[i] > 0.:
                            delta_x[i] = delta_x[i] - 2*np.pi
                        if delta_x[i] <= 0.:
                            delta_x[i] = delta_x[i] + 2*np.pi
            res = Node(next_state)
            x_init = np.linspace(x0.x, x0.x+delta_x, num_steps) + rand_x_init
            ## TODO: : change this to general case
            u_init_i = np.random.uniform(low=[-4.], high=[4], size=(num_steps,1))
            u_init = u_init_i
            #u_init_i = control[max_d_i]
            cost_i = (num_steps-1)*step_sz  #TOEDIT
            #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i))
            #u_init = u_init + np.random.normal(scale=1., size=u_init.shape)
            t_init = np.linspace(0, cost_i, num_steps)
            #t_init = np.append(t_init, 0.)
        else:
            x = torch.cat([x0_x,xG_x], dim=0)
            mpNet = mpNet1
            next_state = mpNet(x.unsqueeze(0), env.unsqueeze(0)).cpu().data
            next_state = unnormalize_func(next_state).numpy()[0]
            cov = np.diag([0.01,0.01,0.01,0.01])
            #mean = next_state
            #next_state = np.random.multivariate_normal(mean=next_state,cov=cov)
            mean = np.zeros(next_state.shape)
            rand_x_init = np.random.multivariate_normal(mean=mean, cov=cov, size=num_steps)
            rand_x_init[0] = rand_x_init[0]*0.
            rand_x_init[-1] = rand_x_init[-1]*0.
            delta_x = x0.x - next_state
            # can be either clockwise or counterclockwise, take shorter one
            for i in range(len(delta_x)):
                if circular[i]:
                    delta_x[i] = delta_x[i] - np.floor(delta_x[i] / (2*np.pi))*(2*np.pi)
                    if delta_x[i] > np.pi:
                        delta_x[i] = delta_x[i] - 2*np.pi
                    # randomly pick either direction
                    rand_d = np.random.randint(2)
                    if rand_d < 1 and np.abs(delta_x[i]) >= np.pi*0.5:
                        if delta_x[i] > 0.:
                            delta_x[i] = delta_x[i] - 2*np.pi
                        elif delta_x[i] <= 0.:
                            delta_x[i] = delta_x[i] + 2*np.pi
            #next_state = state[max_d_i] + delta_x
            res = Node(next_state)
            # initial: from max_d_i to max_d_i+1
            x_init = np.linspace(next_state, next_state + delta_x, num_steps) + rand_x_init
            # action: copy over to number of steps
            u_init_i = np.random.uniform(low=[-4.], high=[4], size=(num_steps,1))
            u_init = u_init_i
            cost_i = (num_steps-1)*step_sz
            #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i))
            #u_init = u_init + np.random.normal(scale=1., size=u_init.shape)
            t_init = np.linspace(0, cost_i, num_steps)
        return res, x_init, u_init, t_init

    def init_informer(env, x0, xG, direction):
        if direction == 0:
            next_state = xG.x
            delta_x = next_state - x0.x
            cov = np.diag([0.01,0.01,0.01,0.01])
            #mean = next_state
            #next_state = np.random.multivariate_normal(mean=next_state,cov=cov)
            mean = np.zeros(next_state.shape)
            rand_x_init = np.random.multivariate_normal(mean=mean, cov=cov, size=num_steps)
            rand_x_init[0] = rand_x_init[0]*0.
            rand_x_init[-1] = rand_x_init[-1]*0.
            # can be either clockwise or counterclockwise, take shorter one
            for i in range(len(delta_x)):
                if circular[i]:
                    delta_x[i] = delta_x[i] - np.floor(delta_x[i] / (2*np.pi))*(2*np.pi)
                    if delta_x[i] > np.pi:
                        delta_x[i] = delta_x[i] - 2*np.pi
                    # randomly pick either direction
                    rand_d = np.random.randint(2)
                    if rand_d < 1 and np.abs(delta_x[i]) >= np.pi*0.5:
                        if delta_x[i] > 0.:
                            delta_x[i] = delta_x[i] - 2*np.pi
                        if delta_x[i] <= 0.:
                            delta_x[i] = delta_x[i] + 2*np.pi
            res = Node(next_state)
            x_init = np.linspace(x0.x, x0.x+delta_x, num_steps) + rand_x_init
            ## TODO: : change this to general case
            u_init_i = np.random.uniform(low=[-4.], high=[4], size=(num_steps,1))
            u_init = u_init_i
            #u_init_i = control[max_d_i]
            cost_i = (num_steps-1)*step_sz
            #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i))
            #u_init = u_init + np.random.normal(scale=1., size=u_init.shape)
            t_init = np.linspace(0, cost_i, num_steps)
            #t_init = np.append(t_init, 0.)
        else:
            next_state = xG.x
            cov = np.diag([0.01,0.01,0.01,0.01])
            #mean = next_state
            #next_state = np.random.multivariate_normal(mean=next_state,cov=cov)
            mean = np.zeros(next_state.shape)
            rand_x_init = np.random.multivariate_normal(mean=mean, cov=cov, size=num_steps)
            rand_x_init[0] = rand_x_init[0]*0.
            rand_x_init[-1] = rand_x_init[-1]*0.
            delta_x = x0.x - next_state
            # can be either clockwise or counterclockwise, take shorter one
            for i in range(len(delta_x)):
                if circular[i]:
                    delta_x[i] = delta_x[i] - np.floor(delta_x[i] / (2*np.pi))*(2*np.pi)
                    if delta_x[i] > np.pi:
                        delta_x[i] = delta_x[i] - 2*np.pi
                    # randomly pick either direction
                    rand_d = np.random.randint(2)
                    if rand_d < 1 and np.abs(delta_x[i]) >= np.pi*0.5:
                        if delta_x[i] > 0.:
                            delta_x[i] = delta_x[i] - 2*np.pi
                        elif delta_x[i] <= 0.:
                            delta_x[i] = delta_x[i] + 2*np.pi
            #next_state = state[max_d_i] + delta_x
            res = Node(next_state)
            # initial: from max_d_i to max_d_i+1
            x_init = np.linspace(next_state, next_state + delta_x, num_steps) + rand_x_init
            # action: copy over to number of steps
            u_init_i = np.random.uniform(low=[-4.], high=[4], size=(num_steps,1))
            u_init = u_init_i
            cost_i = (num_steps-1)*step_sz
            #u_init = np.repeat(u_init_i, num_steps, axis=0).reshape(-1,len(u_init_i))
            #u_init = u_init + np.random.normal(scale=1., size=u_init.shape)
            t_init = np.linspace(0, cost_i, num_steps)
        return x_init, u_init, t_init

    
    
    fes_env = []   # list of list
    valid_env = []
    time_env = []
    time_total = []
    for i in range(len(paths)):
        time_path = []
        fes_path = []   # 1 for feasible, 0 for not feasible
        valid_path = []      # if the feasibility is valid or not
        # save paths to different files, indicated by i
        #print(obs, flush=True)
        # feasible paths for each env
        for j in range(len(paths[0])):
            state_i = []
            state = paths[i][j]
            # obtain the sequence
            p_start = paths[i][j][0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(controls[i][j])):
                #state_i.append(len(detail_paths)-1)
                max_steps = int(costs[i][j][k]/step_sz)
                accum_cost = 0.
                # modify it because of small difference between data and actual propagation
                p_start = paths[i][j][k]
                state[-1] = paths[i][j][k]
                for step in range(1,max_steps+1):
                    p_start = dynamics(p_start, controls[i][j][k], step_sz)
                    p_start = enforce_bounds(p_start)          
                    detail_paths.append(p_start)
                    detail_controls.append(controls[i][j])
                    detail_costs.append(step_sz)
                    accum_cost += step_sz
                    if (step % 20 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        control.append(controls[i][j][k])
                        cost.append(accum_cost)
                        accum_cost = 0.
            state[-1] = paths[i][j][-1]
            #############################
            
            
            time0 = time.time()
            time_norm = 0.
            fp = 0 # indicator for feasibility
            print ("step: i="+str(i)+" j="+str(j))
            p1_ind=0
            p2_ind=0
            p_ind=0
            if path_lengths[i][j]==0:
                # invalid, feasible = 0, and path count = 0
                fp = 0
                valid_path.append(0)
            if path_lengths[i][j]>0:
                fp = 0
                valid_path.append(1)
                #paths[i][j][0][1] = 0.
                #paths[i][j][path_lengths[i][j]-1][1] = 0.
                path = [paths[i][j][0], paths[i][j][path_lengths[i][j]-1]]
                # plot the entire path
                #plt.plot(paths[i][j][:,0], paths[i][j][:,1])

                start = Node(path[0])
                goal = Node(path[-1])
                #goal = Node(sgs[i][j][1])
                goal.S0 = goal_S0
                goal.rho0 = goal_rho0    # change this later

                control = []
                time_step = []
                step_sz = step_sz
                MAX_NEURAL_REPLAN = 1
                if obs is None:
                    obs_i = None
                    obc_i = None
                else:
                    obs_i = obs[i]
                    obc_i = obc[i]
                    # convert obs_i center to points
                    new_obs_i = []
                    for k in range(len(obs_i)):
                        obs_pt = []
                        obs_pt.append(obs_i[k][0]-obs_width/2)
                        obs_pt.append(obs_i[k][1]-obs_width/2)
                        obs_pt.append(obs_i[k][0]-obs_width/2)
                        obs_pt.append(obs_i[k][1]+obs_width/2)
                        obs_pt.append(obs_i[k][0]+obs_width/2)
                        obs_pt.append(obs_i[k][1]+obs_width/2)
                        obs_pt.append(obs_i[k][0]+obs_width/2)
                        obs_pt.append(obs_i[k][1]-obs_width/2)
                        new_obs_i.append(obs_pt)
                    #obs_i = new_obs_i
                collision_check = lambda x: IsInCollision(x, new_obs_i)
                for t in range(MAX_NEURAL_REPLAN):
                    # adaptive step size on replanning attempts
                    res, path_list = plan(obs_i, obc_i, start, goal, detail_paths, informer, init_informer, system, dynamics, \
                               enforce_bounds, collision_check, traj_opt, jac_A, jac_B, step_sz=step_sz, MAX_LENGTH=300)
                    #print('after neural replan:')
                    #print(path)
                    #path = lvc(path, obc[i], IsInCollision, step_sz=step_sz)
                    #print('after lvc:')
                    #print(path)                    
                    if res:
                        fp = 1
                        print('feasible ok!')
                        break
                    #if feasibility_check(bvp_solver, path, obc_i, IsInCollision, step_sz=0.01):
                    #    fp = 1
                    #    print('feasible, ok!')
                    #    break
            if fp:
                # only for successful paths
                time1 = time.time() - time0
                time1 -= time_norm
                time_path.append(time1)
                print('test time: %f' % (time1))
                # write the path
                #print('planned path:')
                #print(path)
                #path = np.array(path)
                #np.savetxt('results/path_%d.txt' % (j), path)
                #np.savetxt('results/control_%d.txt' % (j), np.array(control))
                #np.savetxt('results/timestep_%d.txt' % (j), np.array(time_step))

            fes_path.append(fp)

        time_env.append(time_path)
        time_total += time_path
        print('average test time up to now: %f' % (np.mean(time_total)))
        fes_env.append(fes_path)
        valid_env.append(valid_path)
        print('accuracy up to now: %f' % (float(np.sum(fes_env)) / np.sum(valid_env)))
        time_path = save_dir + 'mpnet_%s_time.pkl' % (data_type)
        pickle.dump(time_env, open(time_path, "wb" ))
        #print(fp/tp)
    return np.array(fes_env), np.array(valid_env)
Пример #8
0
def main(args):
    # set seed
    print(args.model_path)
    torch_seed = np.random.randint(low=0, high=1000)
    np_seed = np.random.randint(low=0, high=1000)
    py_seed = np.random.randint(low=0, high=1000)
    torch.manual_seed(torch_seed)
    np.random.seed(np_seed)
    random.seed(py_seed)
    # Build the models
    if torch.cuda.is_available():
        torch.cuda.set_device(args.device)

    # setup evaluation function and load function
    if args.env_type == 'pendulum':
        IsInCollision = pendulum.IsInCollision
        normalize = pendulum.normalize
        unnormalize = pendulum.unnormalize
        obs_file = None
        obc_file = None
        cae = cae_identity
        mlp = MLP
        system = standard_cpp_systems.PSOPTPendulum()
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
        max_iter = 200
        min_time_steps = 20
        max_time_steps = 200
        integration_step = 0.002
        goal_radius = 0.1
        random_seed = 0
        sst_delta_near = 0.05
        sst_delta_drain = 0.02
        vel_idx = [1]

    mpNet = KMPNet(args.total_input_size, args.AE_input_size,
                   args.mlp_input_size, args.output_size, cae, mlp)
    # load previously trained model if start epoch > 0
    model_path = 'kmpnet_epoch_%d.pkl' % (args.start_epoch)
    if args.start_epoch > 0:
        load_net_state(mpNet, os.path.join(args.model_path, model_path))
        torch_seed, np_seed, py_seed = load_seed(
            os.path.join(args.model_path, model_path))
        # set seed after loading
        torch.manual_seed(torch_seed)
        np.random.seed(np_seed)
        random.seed(py_seed)
    if torch.cuda.is_available():
        mpNet.cuda()
        mpNet.mlp.cuda()
        mpNet.encoder.cuda()
        if args.opt == 'Adagrad':
            mpNet.set_opt(torch.optim.Adagrad, lr=args.learning_rate)
        elif args.opt == 'Adam':
            mpNet.set_opt(torch.optim.Adam, lr=args.learning_rate)
        elif args.opt == 'SGD':
            mpNet.set_opt(torch.optim.SGD, lr=args.learning_rate, momentum=0.9)
    if args.start_epoch > 0:
        load_opt_state(mpNet, os.path.join(args.model_path, model_path))

    # load train and test data
    print('loading...')
    if args.seen_N > 0:
        seen_test_data = data_loader.load_test_dataset(
            N=args.seen_N,
            NP=args.seen_NP,
            s=args.seen_s,
            sp=args.seen_sp,
            p_folder=args.path_folder,
            obs_f=obs_file,
            obc_f=obc_file)
    if args.unseen_N > 0:
        unseen_test_data = data_loader.load_test_dataset(
            N=args.unseen_N,
            NP=args.unseen_NP,
            s=args.unseen_s,
            sp=args.unseen_sp,
            p_folder=args.path_folder,
            obs_f=obs_file,
            obc_f=obc_file)
    # test
    # testing
    print('testing...')
    seen_test_suc_rate = 0.
    unseen_test_suc_rate = 0.
    T = 1

    obc, obs, paths, path_lengths = seen_test_data
    if obs is not None:
        obs = obs.astype(np.float32)
        obs = torch.from_numpy(obs)
    fes_env = []  # list of list
    valid_env = []
    time_env = []
    time_total = []
    normalize_func = lambda x: normalize(x, args.world_size)
    unnormalize_func = lambda x: unnormalize(x, args.world_size)

    low = []
    high = []
    state_bounds = system.get_state_bounds()
    for i in range(len(state_bounds)):
        low.append(state_bounds[i][0])
        high.append(state_bounds[i][1])

    for i in range(len(paths)):
        time_path = []
        fes_path = []  # 1 for feasible, 0 for not feasible
        valid_path = []  # if the feasibility is valid or not
        # save paths to different files, indicated by i
        # feasible paths for each env
        suc_n = 0
        sst_suc_n = 0
        for j in range(len(paths[0])):
            time0 = time.time()
            time_norm = 0.
            fp = 0  # indicator for feasibility
            print("step: i=" + str(i) + " j=" + str(j))
            p1_ind = 0
            p2_ind = 0
            p_ind = 0
            if path_lengths[i][j] == 0:
                # invalid, feasible = 0, and path count = 0
                fp = 0
                valid_path.append(0)
            if path_lengths[i][j] > 0:
                fp = 0
                valid_path.append(1)
                path = [paths[i][j][0], paths[i][j][path_lengths[i][j] - 1]]
                start = paths[i][j][0]
                end = paths[i][j][path_lengths[i][j] - 1]
                start[1] = 0.
                end[1] = 0.
                # plot the entire path
                #plt.plot(paths[i][j][:,0], paths[i][j][:,1])
                """
                planner = SST(
                    state_bounds=system.get_state_bounds(),
                    control_bounds=system.get_control_bounds(),
                    distance=system.distance_computer(),
                    start_state=start,
                    goal_state=end,
                    goal_radius=goal_radius,
                    random_seed=0,
                    sst_delta_near=sst_delta_near,
                    sst_delta_drain=sst_delta_drain
                )
                """
                planner = RRT(state_bounds=system.get_state_bounds(),
                              control_bounds=system.get_control_bounds(),
                              distance=system.distance_computer(),
                              start_state=start,
                              goal_state=end,
                              goal_radius=goal_radius,
                              random_seed=0)

                control = []
                time_step = []
                MAX_NEURAL_REPLAN = 11
                if obs is None:
                    obs_i = None
                    obc_i = None
                else:
                    obs_i = obs[i]
                    obc_i = obc[i]
            print('created RRT')
            # Run planning and print out solution is some statistics every few iterations.
            time0 = time.time()
            start = paths[i][j][0]
            #end = paths[i][j][path_lengths[i][j]-1]
            new_sample = start
            sample = start
            N_sample = 10
            for iteration in range(max_iter // N_sample):
                #if iteration % 50 == 0:
                #    # from time to time use the goal
                #    sample = end
                #    #planner.step_with_sample(system, sample, 20, 200, 0.002)
                #else:
                #planner.step(system, min_time_steps, max_time_steps, integration_step)
                #sample = np.random.uniform(low=low, high=high)
                for num_sample in range(N_sample):
                    ip1 = np.concatenate([new_sample, end])
                    np.expand_dims(ip1, 0)
                    #ip1=torch.cat((obs,start,goal)).unsqueeze(0)
                    time0 = time.time()
                    ip1 = normalize_func(ip1)
                    ip1 = torch.FloatTensor(ip1)
                    time_norm += time.time() - time0
                    ip1 = to_var(ip1)
                    if obs is not None:
                        obs = torch.FloatTensor(obs).unsqueeze(0)
                        obs = to_var(obs)
                    sample = mpNet(ip1, obs).squeeze(0)
                    # unnormalize to world size
                    sample = sample.data.cpu().numpy()
                    time0 = time.time()
                    sample = unnormalize_func(sample)
                    print('sample:')
                    print(sample)
                    print('start:')
                    print(start)
                    print('goal:')
                    print(end)
                    print('accuracy: %f' % (float(suc_n) / (j + 1)))
                    print('sst accuracy: %f' % (float(sst_suc_n) / (j + 1)))
                    sample = planner.step_with_sample(system, sample,
                                                      min_time_steps,
                                                      max_time_steps, 0.002)
                    #planner.step_bvp(system, 10, 200, 0.002)
                    im = planner.visualize_nodes(system)
                    show_image(im, 'nodes', wait=False)
                new_sample = planner.step_with_sample(system, end,
                                                      min_time_steps,
                                                      max_time_steps, 0.002)

                solution = planner.get_solution()
                if solution is not None:
                    print('solved.')
                    suc_n += 1
                    break

            planner = SST(state_bounds=system.get_state_bounds(),
                          control_bounds=system.get_control_bounds(),
                          distance=system.distance_computer(),
                          start_state=start,
                          goal_state=end,
                          goal_radius=goal_radius,
                          random_seed=0,
                          sst_delta_near=sst_delta_near,
                          sst_delta_drain=sst_delta_drain)

            # Run planning and print out solution is some statistics every few iterations.
            time0 = time.time()
            start = paths[i][j][0]
            #end = paths[i][j][path_lengths[i][j]-1]
            new_sample = start
            sample = start
            N_sample = 10
            for iteration in range(max_iter // N_sample):
                for k in range(N_sample):

                    sample = np.random.uniform(low=low, high=high)
                    planner.step_with_sample(system, sample, min_time_steps,
                                             max_time_steps, integration_step)
                    im = planner.visualize_tree(system)
                    show_image(im, 'tree', wait=False)
                    print('accuracy: %f' % (float(suc_n) / (j + 1)))
                    print('sst accuracy: %f' % (float(sst_suc_n) / (j + 1)))

                planner.step_with_sample(system, end, min_time_steps,
                                         max_time_steps, integration_step)
                solution = planner.get_solution()
                if solution is not None:
                    print('solved.')
                    sst_suc_n += 1
                    break

            print('accuracy: %f' % (float(suc_n) / (j + 1)))
            print('sst accuracy: %f' % (float(sst_suc_n) / (j + 1)))
    def plan_one_path(obs_i, obs, obc, start_state, goal_state,
                      goal_inform_state, cost_i, max_iteration, data,
                      out_queue):
        if args.env_type == 'pendulum':
            system = standard_cpp_systems.PSOPTPendulum()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps,
                                                       1, 20, step_sz)
        elif args.env_type == 'cartpole_obs':
            obs_width = 4.0
            psopt_system = _sst_module.PSOPTCartPole()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, obs_width, 'cartpole')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            step_sz = 0.002
            num_steps = 101
            goal_radius = 1.5
            random_seed = 0
            delta_near = 2.0
            delta_drain = 1.2
            device = 3
            num_sample = 10
            min_time_steps = 10
            max_time_steps = 200
            mpnet_goal_threshold = 2.0
            mpnet_length_threshold = 40
            pick_goal_init_threshold = 0.1
            pick_goal_end_threshold = 0.8
            pick_goal_start_percent = 0.4
        elif args.env_type in [
                'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3',
                'acrobot_obs_4', 'acrobot_obs_8'
        ]:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            step_sz = 0.02
            num_steps = 21
            goal_radius = 2.0
            random_seed = 0
            delta_near = 1.0
            delta_drain = 0.5
        #print('creating planner...')
        planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, \
                                              200, num_steps, step_sz, propagate_system, 3)
        #cost_threshold = cost_i * 1.1
        cost_threshold = 100000000.
        """
        # visualization
        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_autoscale_on(True)
        ax.set_xlim(-np.pi, np.pi)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        #print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()

        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2*np.pi
            return res
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        imax = int(2*np.pi/dtheta)


        for i in range(imin, imax):
            for j in range(imin, imax):
                x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
        ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')
        for i in range(len(data)):
            update_line(hl, ax, data[i])
        draw_update_line(ax)        
        # visualization end
        """

        # visualize for cartpole
        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.set_autoscale_on(True)
        hl, = ax.plot([], [], 'b')

        #hl_real, = ax.plot([], [], 'r')
        def update_line(h, ax, new_data):
            h.set_data(np.append(h.get_xdata(), new_data[0]),
                       np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def draw_update_line(ax):
            ax.relim()
            ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()

        # randomly pick up a point in the data, and find similar data in the dataset
        # plot the next point
        #ax.set_autoscale_on(True)
        ax.set_xlim(-30, 30)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        #print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]),
                       np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()

        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2 * np.pi)) * (2 * np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2 * np.pi
            return res

        dx = 1
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        imax = int(2 * 30. / dx)
        jmin = 0
        jmax = int(2 * np.pi / dtheta)

        for i in range(imin, imax):
            for j in range(jmin, jmax):
                x = np.array([dx * i - 30, 0., dtheta * j - np.pi, 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:, 0], feasible_points[:, 2], c='yellow')
        ax.scatter(infeasible_points[:, 0], infeasible_points[:, 2], c='pink')
        #for i in range(len(data)):
        #    update_line(hl, ax, data[i])
        draw_update_line(ax)
        #state_t = start_state
        # visualization end

        # generate a path by using SST to plan for some maximal iterations

        state_t = start_state
        pick_goal_threshold = .0
        ax.scatter(goal_inform_state[0],
                   goal_inform_state[2],
                   marker='*',
                   c='red')  # cartpole

        for i in range(max_iteration):
            time0 = time.time()
            # determine if picking goal based on iteration number
            goal_prob = random.random()
            #flag=1: using MPNet
            #flag=0: not using MPNet
            if goal_prob <= pick_goal_threshold:
                flag = 0
            else:
                flag = 1
            bvp_x, bvp_u, bvp_t, mpnet_res = planner.plan_tree_SMP_step("sst", propagate_system, psopt_system, obc.flatten(), state_t, goal_inform_state, goal_inform_state, \
                                flag, goal_radius, max_iteration, distance_computer, \
                                delta_near, delta_drain, cost_threshold)

            if len(
                    bvp_u
            ) != 0:  # and bvp_t[0] > 0.01:  # turn bvp_t off if want to use step_bvp
                xw_scat = ax.scatter(mpnet_res[0],
                                     mpnet_res[2],
                                     c='lightgreen')
                draw_update_line(ax)

                # propagate data
                p_start = bvp_x[0]
                detail_paths = [p_start]
                detail_controls = []
                detail_costs = []
                state = [p_start]
                control = []
                cost = []
                for k in range(len(bvp_u)):
                    #state_i.append(len(detail_paths)-1)
                    max_steps = int(bvp_t[k] / step_sz)
                    accum_cost = 0.
                    for step in range(1, max_steps + 1):
                        p_start = dynamics(p_start, bvp_u[k], step_sz)
                        p_start = enforce_bounds(p_start)
                        detail_paths.append(p_start)
                        accum_cost += step_sz
                        if (step % 1 == 0) or (step == max_steps):
                            state.append(p_start)
                            cost.append(accum_cost)
                            accum_cost = 0.

                xs_to_plot = np.array(state)
                for j in range(len(xs_to_plot)):
                    xs_to_plot[j] = wrap_angle(xs_to_plot[j], propagate_system)
                xs_to_plot = xs_to_plot[::5]
                #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green')  # acrobot
                ax.scatter(xs_to_plot[:, 0], xs_to_plot[:, 2],
                           c='green')  # cartpole

                #ax.scatter(bvp_x[:,0], bvp_x[:,1], c='green')
                print('solution: x')
                print(bvp_x)
                print('solution: u')
                print(bvp_u)
                print('solution: t')
                print(bvp_t)
                # draw start and goal
                #ax.scatter(start_state[0], goal_state[0], marker='X')
                draw_update_line(ax)
                #state_t = state[-1]

            print('state_t:')
            print(state_t)
            print('mpnet_res')
            print(mpnet_res)
            # based on flag, determine how to change state_t
            if flag:
                # only change state_t if in MPNet inform mode
                #if len(bvp_u) != 0:
                if True:
                    # try using steered result as next start
                    #if not IsInCollision(state_t, obs_i):
                    state_t = mpnet_res
                    print('after copying to state_t:')
                    print('state_t')
                    print(state_t)
                    # if in collision, then not using it
                else:
                    print('failure')
                    state_t = start_state  # failed BVP, back to origin

        plan_time = time.time() - time0

        print('plan time: %fs' % (plan_time))
        if len(res_u) == 0:
            print('failed.')
            out_queue.put(-1)
        else:
            print('path succeeded.')
            print('cost: %f' % (np.sum(res_t)))
            print('cost_threshold: %f' % (cost_threshold))
            print('data cost: %f' % (cost_i))
            out_queue.put(plan_time)
def main(args):
    # set seed
    print(args.model_path)
    torch_seed = np.random.randint(low=0, high=1000)
    np_seed = np.random.randint(low=0, high=1000)
    py_seed = np.random.randint(low=0, high=1000)
    #torch.manual_seed(torch_seed)
    np.random.seed(np_seed)
    random.seed(py_seed)
    # Build the models
    #if torch.cuda.is_available():
    #    torch.cuda.set_device(args.device)

    # setup evaluation function and load function
    if args.env_type == 'acrobot_obs':
        obs_file = None
        obc_file = None
        #cpp_propagator = _sst_module.SystemPropagator()
        #dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)

        obs_f = True
        #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 21
        goal_S0 = np.diag([1.,1.,0,0])
        #goal_S0 = np.identity(4)
        goal_rho0 = 1.0
        obs_file = None
        obc_file = None
        system = _sst_module.PSOPTAcrobot()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)

        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 21
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(x0, x1, 200, num_steps, step_sz*1, step_sz*(num_steps-1), x_init, u_init, t_init)
        goal_S0 = np.diag([1.,1.,0,0])
        #goal_S0 = np.identity(4)
        goal_rho0 = 1.0

        


    if args.env_type == 'pendulum':
        step_sz = 0.002
        num_steps = 20

    elif args.env_type == 'cartpole_obs':
        #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
        step_sz = 0.002
        num_steps = 20
        goal_S0 = np.identity(4)
        goal_rho0 = 1.0
    elif args.env_type in ['acrobot_obs','acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8']:
        #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
        obs_width = 6.0
        step_sz = 0.02
        num_steps = 21
        goal_radius=10.0
        random_seed=0
        delta_near=1.0
        delta_drain=0.5


    # load previously trained model if start epoch > 0
    #model_path='kmpnet_epoch_%d_direction_0_step_%d.pkl' %(args.start_epoch, args.num_steps)
    mlp_path = os.path.join(os.getcwd()+'/c++/','acrobot_obs_MLP_epoch_5000.pt')
    encoder_path = os.path.join(os.getcwd()+'/c++/','acrobot_obs_encoder_epoch_5000.pt')
    cost_mlp_path = os.path.join(os.getcwd()+'/c++/','costnet_acrobot_obs_MLP_epoch_800_step_10.pt')
    cost_encoder_path = os.path.join(os.getcwd()+'/c++/','costnet_acrobot_obs_encoder_epoch_800_step_10.pt')
    
    print('mlp_path:')
    print(mlp_path)
    #####################################################
    def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, cost_i, max_iteration, out_queue):
        if args.env_type in ['acrobot_obs','acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8']:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            step_sz = 0.02
            num_steps = 21
            goal_radius=2.0
            random_seed=0
            delta_near=1.0
            delta_drain=0.5
            device = 3
        #print('creating planner...')
        planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, \
                                              200, num_steps, step_sz, propagate_system, device)
        #cost_threshold = cost_i * 1.1
        cost_threshold = 100000000.
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()
        res_x, res_u, res_t = planner.plan_tree_SMP_cost_gradient("sst", propagate_system, psopt_system, obc.flatten(), start_state, goal_inform_state, goal_inform_state, \
                                goal_radius, max_iteration, distance_computer, \
                                delta_near, delta_drain, cost_threshold, 15)
        plan_time = time.time() - time0

        """
        # visualization
        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_autoscale_on(True)
        ax.set_xlim(-np.pi, np.pi)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        #print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()

        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2*np.pi
            return res
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        imax = int(2*np.pi/dtheta)


        for i in range(imin, imax):
            for j in range(imin, imax):
                x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
        ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')
        #for i in range(len(data)):
        #    update_line(hl, ax, data[i])
        draw_update_line(ax)
        #state_t = start_state
                
        if len(res_u):
            # propagate data
            p_start = res_x[0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(res_u)):
                #state_i.append(len(detail_paths)-1)
                max_steps = int(res_t[k]/step_sz)
                accum_cost = 0.
                #print('p_start:')
                #print(p_start)
                #print('data:')
                #print(paths[i][j][k])
                # modify it because of small difference between data and actual propagation
                p_start = res_x[k]
                state[-1] = res_x[k]
                for step in range(1,max_steps+1):
                    p_start = dynamics(p_start, res_u[k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    accum_cost += step_sz
                    if (step % 1 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        cost.append(accum_cost)
                        accum_cost = 0.
            #print('p_start:')
            #print(p_start)
            #print('data:')
            #print(paths[i][j][-1])
            state[-1] = res_x[-1]
            
            
            
            xs_to_plot = np.array(state)
            for i in range(len(xs_to_plot)):
                xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system)
                if IsInCollision(xs_to_plot[i], obs_i):
                    print('in collision')
            ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green')
            # draw start and goal
            #ax.scatter(start_state[0], goal_state[0], marker='X')
            draw_update_line(ax)
            plt.waitforbuttonpress()
        """
        
        #im = planner.visualize_nodes(propagate_system)
        #sec = input('Let us wait for user input')
        #show_image_opencv(im, "planning_tree", wait=True)
        
        # validate if the path contains collision
        """
        if len(res_u):
            # propagate data
            p_start = res_x[0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(res_u)):
                #state_i.append(len(detail_paths)-1)
                max_steps = int(res_t[k]/step_sz)
                accum_cost = 0.
                #print('p_start:')
                #print(p_start)
                #print('data:')
                #print(paths[i][j][k])
                # modify it because of small difference between data and actual propagation
                p_start = res_x[k]
                state[-1] = res_x[k]
                for step in range(1,max_steps+1):
                    p_start = dynamics(p_start, res_u[k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    accum_cost += step_sz
                    if (step % 1 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        cost.append(accum_cost)
                        accum_cost = 0.
                        # check collision for the new state
                        assert not IsInCollision(p_start, obs_i)
                        
            #print('p_start:')
            #print(p_start)
            #print('data:')
            #print(paths[i][j][-1])
            state[-1] = res_x[-1]
        # validation end
        """
        
        print('plan time: %fs' % (plan_time))
        if len(res_x) == 0:
            print('failed.')
            out_queue.put(-1)
        else:
            print('path succeeded.')
            print('cost: %f' % (np.sum(res_t)))
            print('cost_threshold: %f' % (cost_threshold))
            print('data cost: %f' % (cost_i))
            out_queue.put(plan_time)
    ####################################################################################



    # load data
    print('loading...')
    if args.seen_N > 0:
        seen_test_data = data_loader.load_test_dataset(args.seen_N, args.seen_NP,
                                  args.data_folder, obs_f, args.seen_s, args.seen_sp)
    if args.unseen_N > 0:
        unseen_test_data = data_loader.load_test_dataset(args.unseen_N, args.unseen_NP,
                                  args.data_folder, obs_f, args.unseen_s, args.unseen_sp)
    # test
    # testing

    queue = Queue(1)
    print('testing...')
    seen_test_suc_rate = 0.
    unseen_test_suc_rate = 0.

    obc, obs, paths, sgs, path_lengths, controls, costs = seen_test_data
    obc = obc.astype(np.float32)
    #obc = torch.from_numpy(obc)
    #if torch.cuda.is_available():
    #    obc = obc.cuda()
    
    plan_res = []
    plan_times = []
    plan_res_all = []
    for i in range(len(paths)):
        new_obs_i = []
        obs_i = obs[i]
        plan_res_env = []
        plan_time_env = []
        for k in range(len(obs_i)):
            obs_pt = []
            obs_pt.append(obs_i[k][0]-obs_width/2)
            obs_pt.append(obs_i[k][1]-obs_width/2)
            obs_pt.append(obs_i[k][0]-obs_width/2)
            obs_pt.append(obs_i[k][1]+obs_width/2)
            obs_pt.append(obs_i[k][0]+obs_width/2)
            obs_pt.append(obs_i[k][1]+obs_width/2)
            obs_pt.append(obs_i[k][0]+obs_width/2)
            obs_pt.append(obs_i[k][1]-obs_width/2)
            new_obs_i.append(obs_pt)
        obs_i = new_obs_i
        #print(obs_i)
        for j in range(len(paths[i])):
            start_state = sgs[i][j][0]
            goal_inform_state = paths[i][j][-1]
            goal_state = sgs[i][j][1]
            cost_i = costs[i][j].sum()
            #cost_i = 100000000.
            print('environment: %d/%d, path: %d/%d' % (i+1, len(paths), j+1, len(paths[i])))
            p = Process(target=plan_one_path, args=(obs_i, obs[i], obc[i], start_state, goal_state, goal_inform_state, cost_i, 300000, queue))
            #plan_one_path(obs_i, obs[i], obc[i], start_state, goal_state, goal_inform_state, cost_i, 300000, queue)
            p.start()
            p.join()
            res = queue.get()
            if res == -1:
                plan_res_env.append(0)
                plan_res_all.append(0)
            else:
                plan_res_env.append(1)
                plan_times.append(res)
                plan_res_all.append(1)
            print('average accuracy up to now: %f' % (np.array(plan_res_all).flatten().mean()))
            print('plan average time: %f' % (np.array(plan_times).mean()))
            print('plan time std: %f' % (np.array(plan_times).std()))
        plan_res.append(plan_res_env)
    print('plan accuracy: %f' % (np.array(plan_res).flatten().mean()))
    print('plan average time: %f' % (np.array(plan_times).mean()))
    print('plan time std: %f' % (np.array(plan_times).std()))
def main(args):
    # set seed
    print(args.model_path)
    torch_seed = np.random.randint(low=0, high=1000)
    np_seed = np.random.randint(low=0, high=1000)
    py_seed = np.random.randint(low=0, high=1000)
    #torch.manual_seed(torch_seed)
    np.random.seed(np_seed)
    random.seed(py_seed)
    # Build the models
    #if torch.cuda.is_available():
    #    torch.cuda.set_device(args.device)

    # setup evaluation function and load function
    if args.env_type == 'pendulum':
        obs_file = None
        obc_file = None
        obs_f = False
        #system = standard_cpp_systems.PSOPTPendulum()
        #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
    elif args.env_type == 'cartpole_obs':
        step_sz = 0.002
        num_steps = 21
        goal_radius = 1.5
        random_seed = 0
        delta_near = 2.0
        delta_drain = 1.2
        cost_threshold = 1.2
        min_time_steps = 10
        max_time_steps = 200
        integration_step = 0.002
        obs_width = 4.0
        obs_f = True
        system = _sst_module.PSOPTCartPole()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)

        #system = standard_cpp_systems.RectangleObs(obs_list, args.obs_width, 'cartpole')
        #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
    elif args.env_type == 'acrobot_obs':
        obs_file = None
        obc_file = None
        system = _sst_module.PSOPTAcrobot()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)

        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 21
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(
            x0, x1, 200, num_steps, step_sz * 1, step_sz *
            (num_steps - 1), x_init, u_init, t_init)
        goal_S0 = np.diag([1., 1., 0, 0])
        #goal_S0 = np.identity(4)
        goal_rho0 = 1.0

    if args.env_type == 'pendulum':
        step_sz = 0.002
        num_steps = 20

    elif args.env_type == 'cartpole_obs':
        #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
        step_sz = 0.002
        num_steps = 21
        goal_radius = 1.5
        random_seed = 0
        delta_near = 2.0
        delta_drain = 1.2
        cost_threshold = 1.2
        min_time_steps = 10
        max_time_steps = 200
        integration_step = 0.002
        obs_width = 4.0
        obs_f = True
        IsInCollision = cartpole_IsInCollision
        enforce_bounds = cartpole_enforce_bounds
    elif args.env_type in [
            'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4',
            'acrobot_obs_8'
    ]:
        #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
        obs_width = 6.0
        step_sz = 0.02
        num_steps = 21
        goal_radius = 2.0
        random_seed = 0
        delta_near = 0.1
        delta_drain = 0.05

    # load previously trained model if start epoch > 0
    #model_path='kmpnet_epoch_%d_direction_0_step_%d.pkl' %(args.start_epoch, args.num_steps)
    mlp_path = os.path.join(
        os.getcwd() + '/c++/', '%s_MLP_lr%f_epoch_%d_step_%d.pt' %
        (args.env_type, args.learning_rate, args.start_epoch, args.num_steps))
    encoder_path = os.path.join(
        os.getcwd() + '/c++/', '%s_encoder_lr%f_epoch_%d_step_%d.pt' %
        (args.env_type, args.learning_rate, args.start_epoch, args.num_steps))
    #mlp_path = os.path.join(os.getcwd()+'/c++/','acrobot_obs_MLP_epoch_5000.pt')
    #encoder_path = os.path.join(os.getcwd()+'/c++/','acrobot_obs_encoder_epoch_5000.pt')

    #cost_mlp_path = os.path.join(os.getcwd()+'/c++/','costnet_%s_MLP_lr%f_epoch_%d_step_%d.pt' % (args.env_type, args.learning_rate, args.start_epoch, args.num_steps))
    #cost_encoder_path = os.path.join(os.getcwd()+'/c++/','costnet_%s_encoder_lr%f_epoch_%d_step_%d.pt' % (args.env_type, args.learning_rate, args.start_epoch, args.num_steps))
    cost_mlp_path = os.path.join(
        os.getcwd() + '/c++/', 'costnet_acrobot_obs_MLP_epoch_800_step_10.pt')
    cost_encoder_path = os.path.join(
        os.getcwd() + '/c++/',
        'costnet_acrobot_obs_encoder_epoch_800_step_10.pt')

    print('mlp_path:')
    print(mlp_path)

    #####################################################
    def plan_one_path(obs_i, obs, obc, start_state, goal_state,
                      goal_inform_state, cost_i, max_iteration, data,
                      out_queue):
        if args.env_type == 'pendulum':
            system = standard_cpp_systems.PSOPTPendulum()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps,
                                                       1, 20, step_sz)
        elif args.env_type == 'cartpole_obs':
            obs_width = 4.0
            psopt_system = _sst_module.PSOPTCartPole()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, obs_width, 'cartpole')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            step_sz = 0.002
            num_steps = 101
            goal_radius = 1.5
            random_seed = 0
            delta_near = 2.0
            delta_drain = 1.2
            device = 3
            num_sample = 10
            min_time_steps = 10
            max_time_steps = 200
            mpnet_goal_threshold = 2.0
            mpnet_length_threshold = 40
            pick_goal_init_threshold = 0.1
            pick_goal_end_threshold = 0.8
            pick_goal_start_percent = 0.4
        elif args.env_type in [
                'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3',
                'acrobot_obs_4', 'acrobot_obs_8'
        ]:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            step_sz = 0.02
            num_steps = 21
            goal_radius = 2.0
            random_seed = 0
            delta_near = 1.0
            delta_drain = 0.5
        #print('creating planner...')
        planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, \
                                              200, num_steps, step_sz, propagate_system, 3)
        #cost_threshold = cost_i * 1.1
        cost_threshold = 100000000.
        """
        # visualization
        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_autoscale_on(True)
        ax.set_xlim(-np.pi, np.pi)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        #print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()

        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2*np.pi
            return res
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        imax = int(2*np.pi/dtheta)


        for i in range(imin, imax):
            for j in range(imin, imax):
                x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
        ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')
        for i in range(len(data)):
            update_line(hl, ax, data[i])
        draw_update_line(ax)        
        # visualization end
        """

        # visualize for cartpole
        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.set_autoscale_on(True)
        hl, = ax.plot([], [], 'b')

        #hl_real, = ax.plot([], [], 'r')
        def update_line(h, ax, new_data):
            h.set_data(np.append(h.get_xdata(), new_data[0]),
                       np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def draw_update_line(ax):
            ax.relim()
            ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()

        # randomly pick up a point in the data, and find similar data in the dataset
        # plot the next point
        #ax.set_autoscale_on(True)
        ax.set_xlim(-30, 30)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        #print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]),
                       np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()

        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2 * np.pi)) * (2 * np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2 * np.pi
            return res

        dx = 1
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        imax = int(2 * 30. / dx)
        jmin = 0
        jmax = int(2 * np.pi / dtheta)

        for i in range(imin, imax):
            for j in range(jmin, jmax):
                x = np.array([dx * i - 30, 0., dtheta * j - np.pi, 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:, 0], feasible_points[:, 2], c='yellow')
        ax.scatter(infeasible_points[:, 0], infeasible_points[:, 2], c='pink')
        #for i in range(len(data)):
        #    update_line(hl, ax, data[i])
        draw_update_line(ax)
        #state_t = start_state
        # visualization end

        # generate a path by using SST to plan for some maximal iterations

        state_t = start_state
        pick_goal_threshold = .0
        ax.scatter(goal_inform_state[0],
                   goal_inform_state[2],
                   marker='*',
                   c='red')  # cartpole

        for i in range(max_iteration):
            time0 = time.time()
            # determine if picking goal based on iteration number
            goal_prob = random.random()
            #flag=1: using MPNet
            #flag=0: not using MPNet
            if goal_prob <= pick_goal_threshold:
                flag = 0
            else:
                flag = 1
            bvp_x, bvp_u, bvp_t, mpnet_res = planner.plan_tree_SMP_step("sst", propagate_system, psopt_system, obc.flatten(), state_t, goal_inform_state, goal_inform_state, \
                                flag, goal_radius, max_iteration, distance_computer, \
                                delta_near, delta_drain, cost_threshold)

            if len(
                    bvp_u
            ) != 0:  # and bvp_t[0] > 0.01:  # turn bvp_t off if want to use step_bvp
                xw_scat = ax.scatter(mpnet_res[0],
                                     mpnet_res[2],
                                     c='lightgreen')
                draw_update_line(ax)

                # propagate data
                p_start = bvp_x[0]
                detail_paths = [p_start]
                detail_controls = []
                detail_costs = []
                state = [p_start]
                control = []
                cost = []
                for k in range(len(bvp_u)):
                    #state_i.append(len(detail_paths)-1)
                    max_steps = int(bvp_t[k] / step_sz)
                    accum_cost = 0.
                    for step in range(1, max_steps + 1):
                        p_start = dynamics(p_start, bvp_u[k], step_sz)
                        p_start = enforce_bounds(p_start)
                        detail_paths.append(p_start)
                        accum_cost += step_sz
                        if (step % 1 == 0) or (step == max_steps):
                            state.append(p_start)
                            cost.append(accum_cost)
                            accum_cost = 0.

                xs_to_plot = np.array(state)
                for j in range(len(xs_to_plot)):
                    xs_to_plot[j] = wrap_angle(xs_to_plot[j], propagate_system)
                xs_to_plot = xs_to_plot[::5]
                #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green')  # acrobot
                ax.scatter(xs_to_plot[:, 0], xs_to_plot[:, 2],
                           c='green')  # cartpole

                #ax.scatter(bvp_x[:,0], bvp_x[:,1], c='green')
                print('solution: x')
                print(bvp_x)
                print('solution: u')
                print(bvp_u)
                print('solution: t')
                print(bvp_t)
                # draw start and goal
                #ax.scatter(start_state[0], goal_state[0], marker='X')
                draw_update_line(ax)
                #state_t = state[-1]

            print('state_t:')
            print(state_t)
            print('mpnet_res')
            print(mpnet_res)
            # based on flag, determine how to change state_t
            if flag:
                # only change state_t if in MPNet inform mode
                #if len(bvp_u) != 0:
                if True:
                    # try using steered result as next start
                    #if not IsInCollision(state_t, obs_i):
                    state_t = mpnet_res
                    print('after copying to state_t:')
                    print('state_t')
                    print(state_t)
                    # if in collision, then not using it
                else:
                    print('failure')
                    state_t = start_state  # failed BVP, back to origin

        plan_time = time.time() - time0

        print('plan time: %fs' % (plan_time))
        if len(res_u) == 0:
            print('failed.')
            out_queue.put(-1)
        else:
            print('path succeeded.')
            print('cost: %f' % (np.sum(res_t)))
            print('cost_threshold: %f' % (cost_threshold))
            print('data cost: %f' % (cost_i))
            out_queue.put(plan_time)

    ####################################################################################

    # load data
    print('loading...')
    if args.seen_N > 0:
        seen_test_data = data_loader.load_test_dataset(args.seen_N,
                                                       args.seen_NP,
                                                       args.data_folder, obs_f,
                                                       args.seen_s,
                                                       args.seen_sp)
    if args.unseen_N > 0:
        unseen_test_data = data_loader.load_test_dataset(
            args.unseen_N, args.unseen_NP, args.data_folder, obs_f,
            args.unseen_s, args.unseen_sp)
    # test
    # testing

    queue = Queue(1)
    print('testing...')
    seen_test_suc_rate = 0.
    unseen_test_suc_rate = 0.

    obc, obs, paths, sgs, path_lengths, controls, costs = seen_test_data
    obc = obc.astype(np.float32)
    #obc = torch.from_numpy(obc)
    #if torch.cuda.is_available():
    #    obc = obc.cuda()

    plan_res = []
    plan_times = []
    plan_res_all = []
    for i in range(len(paths)):
        new_obs_i = []
        obs_i = obs[i]
        plan_res_env = []
        plan_time_env = []
        for k in range(len(obs_i)):
            obs_pt = []
            obs_pt.append(obs_i[k][0] - obs_width / 2)
            obs_pt.append(obs_i[k][1] - obs_width / 2)
            obs_pt.append(obs_i[k][0] - obs_width / 2)
            obs_pt.append(obs_i[k][1] + obs_width / 2)
            obs_pt.append(obs_i[k][0] + obs_width / 2)
            obs_pt.append(obs_i[k][1] + obs_width / 2)
            obs_pt.append(obs_i[k][0] + obs_width / 2)
            obs_pt.append(obs_i[k][1] - obs_width / 2)
            new_obs_i.append(obs_pt)
        obs_i = new_obs_i
        #print(obs_i)
        for j in range(len(paths[i])):
            start_state = sgs[i][j][0]
            goal_inform_state = paths[i][j][-1]
            goal_state = sgs[i][j][1]
            cost_i = costs[i][j].sum()
            #cost_i = 100000000.

            # propagate data
            p_start = paths[i][j][0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(controls[i][j])):
                #state_i.append(len(detail_paths)-1)
                max_steps = int(costs[i][j][k] / step_sz)
                accum_cost = 0.
                #print('p_start:')
                #print(p_start)
                #print('data:')
                #print(paths[i][j][k])
                # modify it because of small difference between data and actual propagation
                p_start = paths[i][j][k]
                state[-1] = paths[i][j][k]
                for step in range(1, max_steps + 1):
                    p_start = dynamics(p_start, controls[i][j][k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    detail_controls.append(controls[i][j])
                    detail_costs.append(step_sz)
                    accum_cost += step_sz
                    if (step % 1 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        control.append(controls[i][j][k])
                        cost.append(accum_cost)
                        accum_cost = 0.
            #print('p_start:')
            #print(p_start)
            #print('data:')
            #print(paths[i][j][-1])
            state[-1] = paths[i][j][-1]
            data = state
            # end of propagation

            print('environment: %d/%d, path: %d/%d' %
                  (i + 1, len(paths), j + 1, len(paths[i])))
            #p = Process(target=plan_one_path, args=(obs_i, obs[i], obc[i], start_state, goal_state, goal_inform_state, cost_i, 300000, data, queue))
            plan_one_path(obs_i, obs[i], obc[i], start_state, goal_state,
                          goal_inform_state, cost_i, 300000, data, queue)
            #p.start()
            #p.join()
            res = queue.get()
            if res == -1:
                plan_res_env.append(0)
                plan_res_all.append(0)
            else:
                plan_res_env.append(1)
                plan_times.append(res)
                plan_res_all.append(1)
            print('average accuracy up to now: %f' %
                  (np.array(plan_res_all).flatten().mean()))
            print('plan average time: %f' % (np.array(plan_times).mean()))
            print('plan time std: %f' % (np.array(plan_times).std()))
        plan_res.append(plan_res_env)
    print('plan accuracy: %f' % (np.array(plan_res).flatten().mean()))
    print('plan average time: %f' % (np.array(plan_times).mean()))
    print('plan time std: %f' % (np.array(plan_times).std()))
    def plan_one_path(obs_i, obs, obc, start_state, goal_state,
                      goal_inform_state, max_iteration, data, out_queue):
        if args.env_type == 'pendulum':
            system = standard_cpp_systems.PSOPTPendulum()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps,
                                                       1, 20, step_sz)

        elif args.env_type == 'cartpole_obs':
            #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
            system = _sst_module.CartPole()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve(
                x0, x1, 200, num_steps, step_sz * 1, step_sz * 50, x_init,
                u_init, t_init)
            goal_S0 = np.identity(4)
            goal_rho0 = 1.0
        elif args.env_type in [
                'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3',
                'acrobot_obs_4', 'acrobot_obs_8'
        ]:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))

            step_sz = 0.02
            num_steps = 21
            goal_radius = 2
            random_seed = 0
            #delta_near=1.0
            #delta_drain=0.5
            delta_near = 0.1
            delta_drain = 0.05
        #print('creating planner...')
        planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path,
                                              cost_mlp_path, cost_encoder_path,
                                              20, num_steps, step_sz,
                                              propagate_system, args.device)
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()
        #print('obc:')
        #print(obc.shape)
        #print(delta_near)
        #print(delta_drain)
        #print('start_state:')
        #print(start_state)
        #print('goal_state:')
        #print(goal_state)

        state_t = start_state

        pick_goal_threshold = 0.10
        goal_linear_inc_start_iter = int(0.6 * max_iteration)
        goal_linear_inc_end_iter = max_iteration
        goal_linear_inc_end_threshold = 0.95
        goal_linear_inc = (goal_linear_inc_end_threshold -
                           pick_goal_threshold) / (goal_linear_inc_end_iter -
                                                   goal_linear_inc_start_iter)

        start_time = time.time()
        plan_time = -1
        for i in tqdm(range(max_iteration)):
            print('iteration: %d' % (i))
            print('state_t:')
            print(state_t)
            # calculate if using goal or not
            use_goal_prob = random.random()
            if i > goal_linear_inc_start_iter:
                pick_goal_threshold += goal_linear_inc
            if use_goal_prob <= pick_goal_threshold:
                flag = 0
            else:
                flag = 1

            bvp_x, bvp_u, bvp_t, mpnet_res = planner.plan_step("sst", propagate_system, psopt_system, obc.flatten(), state_t, goal_inform_state, goal_inform_state, \
                                    flag, goal_radius, max_iteration, distance_computer, \
                                    delta_near, delta_drain)

            solution = planner.get_solution()
            if solution is not None:
                plan_time = time.time() - start_time

        if plan_time == -1:
            print('failed.')
            out_queue.put(-1)
        else:
            print('path succeeded.')
            out_queue.put(plan_time)  #if len(res_x) == 0:
def main(args):
    # set seed
    print(args.model_path)
    torch_seed = np.random.randint(low=0, high=1000)
    np_seed = np.random.randint(low=0, high=1000)
    py_seed = np.random.randint(low=0, high=1000)
    #torch.manual_seed(torch_seed)
    np.random.seed(np_seed)
    random.seed(py_seed)
    # Build the models
    #if torch.cuda.is_available():
    #    torch.cuda.set_device(args.device)

    # setup evaluation function and load function
    if args.env_type == 'pendulum':
        obs_file = None
        obc_file = None
        obs_f = False
        #system = standard_cpp_systems.PSOPTPendulum()
        #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
    elif args.env_type == 'cartpole_obs':
        normalize = cartpole.normalize
        unnormalize = cartpole.unnormalize
        obs_file = None
        obc_file = None
        #dynamics = cartpole.dynamics
        #jax_dynamics = cartpole.jax_dynamics
        #enforce_bounds = cartpole.enforce_bounds
        cae = CAE_acrobot_voxel_2d
        mlp = mlp_acrobot.MLP
        obs_f = True
        #system = standard_cpp_systems.RectangleObs(obs_list, args.obs_width, 'cartpole')
        #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
    elif args.env_type == 'acrobot_obs':
        obs_file = None
        obc_file = None
        system = _sst_module.PSOPTAcrobot()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)

        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 21
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(
            x0, x1, 200, num_steps, step_sz * 1, step_sz *
            (num_steps - 1), x_init, u_init, t_init)
        obs_width = 6.0
        step_sz = 0.02
        num_steps = 21
        goal_radius = 2.0
        random_seed = 0
        delta_near = 0.1
        delta_drain = 0.05

    elif args.env_type in [
            'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4',
            'acrobot_obs_8'
    ]:
        #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
        obs_width = 6.0
        step_sz = 0.02
        num_steps = 21
        goal_radius = 2.0
        random_seed = 0
        delta_near = 0.1
        delta_drain = 0.05

    # load previously trained model if start epoch > 0
    #model_path='kmpnet_epoch_%d_direction_0_step_%d.pkl' %(args.start_epoch, args.num_steps)
    mlp_path = os.path.join(
        os.getcwd() + '/c++/',
        'acrobot_obs_MLP_lr0.010000_epoch_2850_step_20.pt')
    encoder_path = os.path.join(
        os.getcwd() + '/c++/',
        'acrobot_obs_encoder_lr0.010000_epoch_2850_step_20.pt')
    cost_mlp_path = os.path.join(
        os.getcwd() + '/c++/',
        'costnet_acrobot_obs_8_MLP_epoch_300_step_20.pt')
    cost_encoder_path = os.path.join(
        os.getcwd() + '/c++/',
        'costnet_acrobot_obs_8_encoder_epoch_300_step_20.pt')

    print('mlp_path:')
    print(mlp_path)

    #####################################################
    def plan_one_path(obs_i, obs, obc, start_state, goal_state,
                      goal_inform_state, max_iteration, data, out_queue):
        if args.env_type == 'pendulum':
            system = standard_cpp_systems.PSOPTPendulum()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps,
                                                       1, 20, step_sz)

        elif args.env_type == 'cartpole_obs':
            #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
            system = _sst_module.CartPole()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve(
                x0, x1, 200, num_steps, step_sz * 1, step_sz * 50, x_init,
                u_init, t_init)
            goal_S0 = np.identity(4)
            goal_rho0 = 1.0
        elif args.env_type in [
                'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3',
                'acrobot_obs_4', 'acrobot_obs_8'
        ]:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))

            step_sz = 0.02
            num_steps = 21
            goal_radius = 2
            random_seed = 0
            #delta_near=1.0
            #delta_drain=0.5
            delta_near = 0.1
            delta_drain = 0.05
        #print('creating planner...')
        planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path,
                                              cost_mlp_path, cost_encoder_path,
                                              20, num_steps, step_sz,
                                              propagate_system, args.device)
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()
        #print('obc:')
        #print(obc.shape)
        #print(delta_near)
        #print(delta_drain)
        #print('start_state:')
        #print(start_state)
        #print('goal_state:')
        #print(goal_state)

        state_t = start_state

        pick_goal_threshold = 0.10
        goal_linear_inc_start_iter = int(0.6 * max_iteration)
        goal_linear_inc_end_iter = max_iteration
        goal_linear_inc_end_threshold = 0.95
        goal_linear_inc = (goal_linear_inc_end_threshold -
                           pick_goal_threshold) / (goal_linear_inc_end_iter -
                                                   goal_linear_inc_start_iter)

        start_time = time.time()
        plan_time = -1
        for i in tqdm(range(max_iteration)):
            print('iteration: %d' % (i))
            print('state_t:')
            print(state_t)
            # calculate if using goal or not
            use_goal_prob = random.random()
            if i > goal_linear_inc_start_iter:
                pick_goal_threshold += goal_linear_inc
            if use_goal_prob <= pick_goal_threshold:
                flag = 0
            else:
                flag = 1

            bvp_x, bvp_u, bvp_t, mpnet_res = planner.plan_step("sst", propagate_system, psopt_system, obc.flatten(), state_t, goal_inform_state, goal_inform_state, \
                                    flag, goal_radius, max_iteration, distance_computer, \
                                    delta_near, delta_drain)

            solution = planner.get_solution()
            if solution is not None:
                plan_time = time.time() - start_time

        if plan_time == -1:
            print('failed.')
            out_queue.put(-1)
        else:
            print('path succeeded.')
            out_queue.put(plan_time)  #if len(res_x) == 0:

    ####################################################################################

    # load data
    print('loading...')
    if args.seen_N > 0:
        seen_test_data = data_loader.load_test_dataset(args.seen_N,
                                                       args.seen_NP,
                                                       args.data_folder, obs_f,
                                                       args.seen_s,
                                                       args.seen_sp)
    if args.unseen_N > 0:
        unseen_test_data = data_loader.load_test_dataset(
            args.unseen_N, args.unseen_NP, args.data_folder, obs_f,
            args.unseen_s, args.unseen_sp)
    # test
    # testing

    queue = Queue(1)
    print('testing...')
    seen_test_suc_rate = 0.
    unseen_test_suc_rate = 0.

    obc, obs, paths, sgs, path_lengths, controls, costs = seen_test_data
    obc = obc.astype(np.float32)
    #obc = torch.from_numpy(obc)
    #if torch.cuda.is_available():
    #    obc = obc.cuda()
    for i in range(len(paths)):
        new_obs_i = []
        obs_i = obs[i]
        for k in range(len(obs_i)):
            obs_pt = []
            obs_pt.append(obs_i[k][0] - obs_width / 2)
            obs_pt.append(obs_i[k][1] - obs_width / 2)
            obs_pt.append(obs_i[k][0] - obs_width / 2)
            obs_pt.append(obs_i[k][1] + obs_width / 2)
            obs_pt.append(obs_i[k][0] + obs_width / 2)
            obs_pt.append(obs_i[k][1] + obs_width / 2)
            obs_pt.append(obs_i[k][0] + obs_width / 2)
            obs_pt.append(obs_i[k][1] - obs_width / 2)
            new_obs_i.append(obs_pt)
        obs_i = new_obs_i
        #print(obs_i)
        for j in range(len(paths[i])):
            start_state = sgs[i][j][0]
            goal_inform_state = paths[i][j][-1]
            goal_state = sgs[i][j][1]
            #p = Process(target=plan_one_path, args=(obs[i], obc[i], start_state, goal_state, 500, queue))

            # propagate data
            p_start = paths[i][j][0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(controls[i][j])):
                #state_i.append(len(detail_paths)-1)
                max_steps = int(costs[i][j][k] / step_sz)
                accum_cost = 0.
                #print('p_start:')
                #print(p_start)
                #print('data:')
                #print(paths[i][j][k])
                # modify it because of small difference between data and actual propagation
                p_start = paths[i][j][k]
                state[-1] = paths[i][j][k]
                for step in range(1, max_steps + 1):
                    p_start = dynamics(p_start, controls[i][j][k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    detail_controls.append(controls[i][j])
                    detail_costs.append(step_sz)
                    accum_cost += step_sz
                    if (step % 1 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        control.append(controls[i][j][k])
                        cost.append(accum_cost)
                        accum_cost = 0.
            #print('p_start:')
            #print(p_start)
            #print('data:')
            #print(paths[i][j][-1])
            state[-1] = paths[i][j][-1]
            data = state
            p = Process(target=plan_one_path,
                        args=(obs_i, obs[i], obc[i], start_state, goal_state,
                              goal_inform_state, 1000, data, queue))
            p.start()
            p.join()
            res = queue.get()
Пример #14
0
def main(args):
    # set seed
    print(args.model_path)
    torch_seed = np.random.randint(low=0, high=1000)
    np_seed = np.random.randint(low=0, high=1000)
    py_seed = np.random.randint(low=0, high=1000)
    #torch.manual_seed(torch_seed)
    np.random.seed(np_seed)
    random.seed(py_seed)
    # Build the models
    #if torch.cuda.is_available():
    #    torch.cuda.set_device(args.device)

    # setup evaluation function and load function
    if args.env_type == 'pendulum':
        obs_file = None
        obc_file = None
        obs_f = False
        #system = standard_cpp_systems.PSOPTPendulum()
        #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
    elif args.env_type == 'cartpole_obs':
        normalize = cartpole.normalize
        unnormalize = cartpole.unnormalize
        obs_file = None
        obc_file = None
        dynamics = cartpole.dynamics
        jax_dynamics = cartpole.jax_dynamics
        enforce_bounds = cartpole.enforce_bounds
        cae = CAE_acrobot_voxel_2d
        mlp = mlp_acrobot.MLP
        obs_f = True
        #system = standard_cpp_systems.RectangleObs(obs_list, args.obs_width, 'cartpole')
        #bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
    elif args.env_type == 'acrobot_obs':
        obs_file = None
        obc_file = None
        system = _sst_module.PSOPTAcrobot()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)

        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 21
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(
            x0, x1, 200, num_steps, step_sz * 1, step_sz *
            (num_steps - 1), x_init, u_init, t_init)
        goal_S0 = np.diag([1., 1., 0, 0])
        #goal_S0 = np.identity(4)
        goal_rho0 = 1.0

    elif args.env_type == 'acrobot_obs_2':
        obs_file = None
        obc_file = None
        system = _sst_module.PSOPTAcrobot()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)
        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 21
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(
            x0, x1, 400, num_steps, step_sz * 1, step_sz *
            (num_steps - 1), x_init, u_init, t_init)
        goal_S0 = np.diag([1., 1., 0, 0])
        #goal_S0 = np.identity(4)
        goal_rho0 = 1.0

    elif args.env_type == 'acrobot_obs_3':
        obs_file = None
        obc_file = None
        system = _sst_module.PSOPTAcrobot()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)
        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 21
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(
            x0, x1, 400, num_steps, step_sz * 1, step_sz *
            (num_steps - 1), x_init, u_init, t_init)
        goal_S0 = np.diag([1., 1., 0, 0])
        #goal_S0 = np.identity(4)
        goal_rho0 = 1.0

    elif args.env_type == 'acrobot_obs_5':
        obs_file = None
        obc_file = None
        system = _sst_module.PSOPTAcrobot()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)
        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 21
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(
            x0, x1, 400, num_steps, step_sz * 1, step_sz *
            (num_steps - 1), x_init, u_init, t_init)
        goal_S0 = np.diag([1., 1., 0, 0])
        #goal_S0 = np.identity(4)
        goal_rho0 = 1.0
    elif args.env_type == 'acrobot_obs_6':
        obs_file = None
        obc_file = None
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)
        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 21
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(
            x0, x1, 400, num_steps, step_sz * 1, step_sz *
            (num_steps - 1), x_init, u_init, t_init)
        goal_S0 = np.diag([1., 1., 0, 0])
        #goal_S0 = np.identity(4)
        goal_rho0 = 1.0
        obs_width = 6.0
    elif args.env_type == 'acrobot_obs_6':
        obs_file = None
        obc_file = None
        system = _sst_module.PSOPTAcrobot()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)
        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 21
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(
            x0, x1, 400, num_steps, step_sz * 1, step_sz *
            (num_steps - 1), x_init, u_init, t_init)
        goal_S0 = np.diag([1., 1., 0, 0])
        #goal_S0 = np.identity(4)
        goal_rho0 = 1.0
        obs_width = 6.0

    elif args.env_type == 'acrobot_obs_8':
        obs_file = None
        obc_file = None
        system = _sst_module.PSOPTAcrobot()
        cpp_propagator = _sst_module.SystemPropagator()
        dynamics = lambda x, u, t: cpp_propagator.propagate(system, x, u, t)
        obs_f = True
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        #num_steps = 21
        num_steps = 21  #args.num_steps*2
        traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init: bvp_solver.solve(
            x0, x1, 400, num_steps, step_sz * 1, step_sz *
            (num_steps - 1), x_init, u_init, t_init)
        #traj_opt = lambda x0, x1, step_sz, num_steps, x_init, u_init, t_init:
        #def cem_trajopt(x0, x1, step_sz, num_steps, x_init, u_init, t_init):
        #    u, t = acrobot_obs.trajopt(x0, x1, 500, num_steps, step_sz*1, step_sz*(num_steps-1), x_init, u_init, t_init)
        #    xs, us, dts, valid = propagate(x0, u, t, dynamics=dynamics, enforce_bounds=enforce_bounds, IsInCollision=lambda x: False, system=system, step_sz=step_sz)
        #    return xs, us, dts
        #traj_opt = cem_trajopt
        obs_width = 6.0
        goal_S0 = np.diag([1., 1., 0, 0])
        goal_rho0 = 1.0

    if args.env_type == 'pendulum':
        step_sz = 0.002
        num_steps = 20

    elif args.env_type == 'cartpole_obs':
        #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
        step_sz = 0.002
        num_steps = 20
        goal_S0 = np.identity(4)
        goal_rho0 = 1.0
    elif args.env_type in [
            'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4',
            'acrobot_obs_8'
    ]:
        #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
        obs_width = 6.0
        step_sz = 0.02
        num_steps = 21
        goal_radius = 2.0
        random_seed = 0
        delta_near = 0.1
        delta_drain = 0.05

    # load previously trained model if start epoch > 0
    #model_path='kmpnet_epoch_%d_direction_0_step_%d.pkl' %(args.start_epoch, args.num_steps)
    mlp_path = os.path.join(
        os.getcwd() + '/c++/',
        'acrobot_obs_MLP_lr0.010000_epoch_2850_step_20.pt')
    encoder_path = os.path.join(
        os.getcwd() + '/c++/',
        'acrobot_obs_encoder_lr0.010000_epoch_2850_step_20.pt')
    cost_mlp_path = os.path.join(
        os.getcwd() + '/c++/',
        'costnet_acrobot_obs_8_MLP_epoch_300_step_20.pt')
    cost_encoder_path = os.path.join(
        os.getcwd() + '/c++/',
        'costnet_acrobot_obs_8_encoder_epoch_300_step_20.pt')

    print('mlp_path:')
    print(mlp_path)

    #####################################################
    def plan_one_path(obs_i, obs, obc, start_state, goal_state,
                      goal_inform_state, max_iteration, out_queue):
        if args.env_type == 'pendulum':
            system = standard_cpp_systems.PSOPTPendulum()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps,
                                                       1, 20, step_sz)

        elif args.env_type == 'cartpole_obs':
            #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
            system = _sst_module.CartPole()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve(
                x0, x1, 200, num_steps, step_sz * 1, step_sz * 50, x_init,
                u_init, t_init)
            goal_S0 = np.identity(4)
            goal_rho0 = 1.0
        elif args.env_type in [
                'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3',
                'acrobot_obs_4', 'acrobot_obs_8'
        ]:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            step_sz = 0.02
            num_steps = 21
            goal_radius = 2.0
            random_seed = 0
            delta_near = .1
            delta_drain = 0.05
        #print('creating planner...')
        planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path,
                                              cost_mlp_path, cost_encoder_path,
                                              args.bvp_iter, num_steps,
                                              step_sz, propagate_system, 3)
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()
        #print('obc:')
        #print(obc.shape)
        #print(delta_near)
        #print(delta_drain)
        #print('start_state:')
        #print(start_state)
        #print('goal_state:')
        #print(goal_state)
        res_x, res_u, res_t = planner.plan("sst", args.plan_type, propagate_system, psopt_system, obc.flatten(), start_state, goal_inform_state, goal_inform_state, \
                                goal_radius, max_iteration, distance_computer, \
                                args.delta_near, args.delta_drain)

        #res_x, res_u, res_t = planner.plan("sst", propagate_system, psopt_system, obc.flatten(), start_state, goal_state, goal_inform_state, \
        #                        goal_radius, max_iteration, propagate_system.distance_computer(), \
        #                        delta_near, delta_drain)
        plan_time = time.time() - time0
        """
        # visualization
        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_autoscale_on(True)
        ax.set_xlim(-np.pi, np.pi)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()

        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2*np.pi
            return res
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        imax = int(2*np.pi/dtheta)


        for i in range(imin, imax):
            for j in range(imin, imax):
                x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
        ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')

        if len(res_x) != 0:
            xs_to_plot = np.array(res_x)
            for i in range(len(xs_to_plot)):
                xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system)
            ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='orange')
            print('solution: x')
            print(res_x)
            print('solution: u')
            print(res_u)
            print('solution: t')
            print(res_t)
            # draw start and goal
            ax.scatter(start_state[0], goal_state[0], marker='X')
            draw_update_line(ax)
            plt.waitforbuttonpress()

        
        
        #im = planner.visualize_nodes(propagate_system)
        #sec = input('Let us wait for user input')
        #show_image_opencv(im, "planning_tree", wait=True)
        """
        print('plan time: %fs' % (plan_time))
        if len(res_x) == 0:
            print('failed.')
            out_queue.put(-1)
        else:
            print('path succeeded.')
            out_queue.put(plan_time)

    ####################################################################################

    # load data
    print('loading...')
    if args.seen_N > 0:
        seen_test_data = data_loader.load_test_dataset(args.seen_N,
                                                       args.seen_NP,
                                                       args.data_folder, obs_f,
                                                       args.seen_s,
                                                       args.seen_sp)
    if args.unseen_N > 0:
        unseen_test_data = data_loader.load_test_dataset(
            args.unseen_N, args.unseen_NP, args.data_folder, obs_f,
            args.unseen_s, args.unseen_sp)
    # test
    # testing

    queue = Queue(1)
    print('testing...')
    seen_test_suc_rate = 0.
    unseen_test_suc_rate = 0.

    obc, obs, paths, sgs, path_lengths, controls, costs = seen_test_data
    obc = obc.astype(np.float32)
    #obc = torch.from_numpy(obc)
    #if torch.cuda.is_available():
    #    obc = obc.cuda()

    plan_res = []
    plan_times = []
    plan_res_all = []
    for i in range(len(paths)):
        new_obs_i = []
        obs_i = obs[i]
        plan_res_env = []
        plan_time_env = []
        for k in range(len(obs_i)):
            obs_pt = []
            obs_pt.append(obs_i[k][0] - obs_width / 2)
            obs_pt.append(obs_i[k][1] - obs_width / 2)
            obs_pt.append(obs_i[k][0] - obs_width / 2)
            obs_pt.append(obs_i[k][1] + obs_width / 2)
            obs_pt.append(obs_i[k][0] + obs_width / 2)
            obs_pt.append(obs_i[k][1] + obs_width / 2)
            obs_pt.append(obs_i[k][0] + obs_width / 2)
            obs_pt.append(obs_i[k][1] - obs_width / 2)
            new_obs_i.append(obs_pt)
        obs_i = new_obs_i
        #print(obs_i)
        for j in range(len(paths[i])):
            start_state = sgs[i][j][0]
            goal_inform_state = paths[i][j][-1]
            goal_state = sgs[i][j][1]
            print('environment: %d/%d, path: %d/%d' %
                  (i + 1, len(paths), j + 1, len(paths[i])))

            p = Process(target=plan_one_path,
                        args=(obs_i, obs[i], obc[i], start_state, goal_state,
                              goal_inform_state, 1000, queue))
            #plan_one_path(obs_i, obs[i], obc[i], start_state, goal_state, goal_inform_state, 500, queue)
            p.start()
            p.join()
            res = queue.get()
            if res == -1:
                plan_res_env.append(0)
                plan_res_all.append(0)
            else:
                plan_res_env.append(1)
                plan_times.append(res)
                plan_res_all.append(1)
            print('average accuracy up to now: %f' %
                  (np.array(plan_res_all).flatten().mean()))
            print('plan average time: %f' % (np.array(plan_times).mean()))
            print('plan time std: %f' % (np.array(plan_times).std()))
        plan_res.append(plan_res_env)
    print('plan accuracy: %f' % (np.array(plan_res).flatten().mean()))
    print('plan average time: %f' % (np.array(plan_times).mean()))
    print('plan time std: %f' % (np.array(plan_times).std()))
Пример #15
0
def main(args):
    # set seed
    print(args.model_path)
    torch_seed = np.random.randint(low=0, high=1000)
    np_seed = np.random.randint(low=0, high=1000)
    py_seed = np.random.randint(low=0, high=1000)
    torch.manual_seed(torch_seed)
    np.random.seed(np_seed)
    random.seed(py_seed)
    # Build the models
    if torch.cuda.is_available():
        torch.cuda.set_device(args.device)

    # setup evaluation function and load function
    if args.env_type == 'pendulum':
        IsInCollision = pendulum.IsInCollision
        normalize = pendulum.normalize
        unnormalize = pendulum.unnormalize
        obs_file = None
        obc_file = None
        cae = cae_identity
        mlp = MLP
        system = standard_cpp_systems.PSOPTPendulum()
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
        max_iter = 100
        min_time_steps = 10
        max_time_steps = 200
        integration_step = 0.002
        goal_radius = 0.1
        random_seed = 0
        sst_delta_near = 0.05
        sst_delta_drain = 0.02
        vel_idx = [1]

    mpNet = KMPNet(args.total_input_size, args.AE_input_size,
                   args.mlp_input_size, args.output_size, cae, mlp)
    # load previously trained model if start epoch > 0
    model_path = 'kmpnet_epoch_%d.pkl' % (args.start_epoch)
    if args.start_epoch > 0:
        load_net_state(mpNet, os.path.join(args.model_path, model_path))
        torch_seed, np_seed, py_seed = load_seed(
            os.path.join(args.model_path, model_path))
        # set seed after loading
        torch.manual_seed(torch_seed)
        np.random.seed(np_seed)
        random.seed(py_seed)
    if torch.cuda.is_available():
        mpNet.cuda()
        mpNet.mlp.cuda()
        mpNet.encoder.cuda()
        if args.opt == 'Adagrad':
            mpNet.set_opt(torch.optim.Adagrad, lr=args.learning_rate)
        elif args.opt == 'Adam':
            mpNet.set_opt(torch.optim.Adam, lr=args.learning_rate)
        elif args.opt == 'SGD':
            mpNet.set_opt(torch.optim.SGD, lr=args.learning_rate, momentum=0.9)
    if args.start_epoch > 0:
        load_opt_state(mpNet, os.path.join(args.model_path, model_path))

    # load train and test data
    print('loading...')
    if args.seen_N > 0:
        seen_test_data = data_loader.load_test_dataset(
            N=args.seen_N,
            NP=args.seen_NP,
            s=args.seen_s,
            sp=args.seen_sp,
            p_folder=args.path_folder,
            obs_f=obs_file,
            obc_f=obc_file)
    if args.unseen_N > 0:
        unseen_test_data = data_loader.load_test_dataset(
            N=args.unseen_N,
            NP=args.unseen_NP,
            s=args.unseen_s,
            sp=args.unseen_sp,
            p_folder=args.path_folder,
            obs_f=obs_file,
            obc_f=obc_file)
    # test
    # testing
    print('testing...')
    seen_test_suc_rate = 0.
    unseen_test_suc_rate = 0.
    T = 1

    obc, obs, paths, path_lengths = seen_test_data
    if obs is not None:
        obs = obs.astype(np.float32)
        obs = torch.from_numpy(obs)
    fes_env = []  # list of list
    valid_env = []
    time_env = []
    time_total = []
    normalize_func = lambda x: normalize(x, args.world_size)
    unnormalize_func = lambda x: unnormalize(x, args.world_size)

    for i in range(len(paths)):
        time_path = []
        fes_path = []  # 1 for feasible, 0 for not feasible
        valid_path = []  # if the feasibility is valid or not
        # save paths to different files, indicated by i
        # feasible paths for each env
        suc_n = 0
        for j in range(len(paths[0])):
            plt.ion()
            fig = plt.figure()
            ax = fig.add_subplot(111)
            ax.set_autoscale_on(True)
            hl, = ax.plot([], [], 'black')
            hl_real, = ax.plot([], [], 'yellow')

            time0 = time.time()
            time_norm = 0.
            fp = 0  # indicator for feasibility
            print("step: i=" + str(i) + " j=" + str(j))
            p1_ind = 0
            p2_ind = 0
            p_ind = 0
            if path_lengths[i][j] == 0:
                # invalid, feasible = 0, and path count = 0
                fp = 0
                valid_path.append(0)
            if path_lengths[i][j] > 0:
                fp = 0
                valid_path.append(1)
                path = [paths[i][j][0], paths[i][j][path_lengths[i][j] - 1]]
                start = paths[i][j][0]
                end = paths[i][j][path_lengths[i][j] - 1]
                #start[1] = 0.
                #end[1] = 0.
                # plot the entire path
                #plt.plot(paths[i][j][:,0], paths[i][j][:,1])
                control = []
                time_step = []
                MAX_NEURAL_REPLAN = 11
                if obs is None:
                    obs_i = None
                    obc_i = None
                else:
                    obs_i = obs[i]
                    obc_i = obc[i]
            for k in range(path_lengths[i][j]):
                update_line(hl, ax, fig, paths[i][j][k])
            print('created RRT')
            # Run planning and print out solution is some statistics every few iterations.
            time0 = time.time()
            start = paths[i][j][0]
            #end = paths[i][j][path_lengths[i][j]-1]
            new_sample = start
            print(new_sample)
            ax.scatter(new_sample[0], new_sample[1], c='r')
            ax.scatter(end[0], end[1], c='g')
            for iteration in range(max_iter):
                clear_line(hl_real, ax, fig)
                #hl_real, = ax.plot([], [], 'yellow')
                ip1 = np.concatenate([new_sample, end])
                np.expand_dims(ip1, 0)
                #ip1=torch.cat((obs,start,goal)).unsqueeze(0)
                time0 = time.time()
                ip1 = normalize_func(ip1)
                ip1 = torch.FloatTensor(ip1)
                time_norm += time.time() - time0
                ip1 = to_var(ip1)
                if obs is not None:
                    obs = torch.FloatTensor(obs).unsqueeze(0)
                    obs = to_var(obs)
                sample = mpNet(ip1, obs).squeeze(0)
                # unnormalize to world size
                sample = sample.data.cpu().numpy()
                time0 = time.time()
                sample = unnormalize_func(sample)
                ax.scatter(sample[0], sample[1], c='b')
                plt.pause(0.01)

                steer, steer_state, steer_control, steer_time_step = plan_general.steerTo(
                    bvp_solver, start, sample, None, None, step_sz=0.02)
                for k in range(len(steer_state)):
                    update_line(hl_real, ax, fig, steer_state[k])
            plt.waitforbuttonpress()
Пример #16
0
    def plan_one_path(obs_i, obs, obc, start_state, goal_state,
                      goal_inform_state, max_iteration, out_queue):
        if args.env_type == 'pendulum':
            system = standard_cpp_systems.PSOPTPendulum()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps,
                                                       1, 20, step_sz)

        elif args.env_type == 'cartpole_obs':
            #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
            system = _sst_module.CartPole()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve(
                x0, x1, 200, num_steps, step_sz * 1, step_sz * 50, x_init,
                u_init, t_init)
            goal_S0 = np.identity(4)
            goal_rho0 = 1.0
        elif args.env_type in [
                'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3',
                'acrobot_obs_4', 'acrobot_obs_8'
        ]:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            step_sz = 0.02
            num_steps = 21
            goal_radius = 2.0
            random_seed = 0
            delta_near = .1
            delta_drain = 0.05
        #print('creating planner...')
        planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path,
                                              cost_mlp_path, cost_encoder_path,
                                              args.bvp_iter, num_steps,
                                              step_sz, propagate_system, 3)
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()
        #print('obc:')
        #print(obc.shape)
        #print(delta_near)
        #print(delta_drain)
        #print('start_state:')
        #print(start_state)
        #print('goal_state:')
        #print(goal_state)
        res_x, res_u, res_t = planner.plan("sst", args.plan_type, propagate_system, psopt_system, obc.flatten(), start_state, goal_inform_state, goal_inform_state, \
                                goal_radius, max_iteration, distance_computer, \
                                args.delta_near, args.delta_drain)

        #res_x, res_u, res_t = planner.plan("sst", propagate_system, psopt_system, obc.flatten(), start_state, goal_state, goal_inform_state, \
        #                        goal_radius, max_iteration, propagate_system.distance_computer(), \
        #                        delta_near, delta_drain)
        plan_time = time.time() - time0
        """
        # visualization
        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_autoscale_on(True)
        ax.set_xlim(-np.pi, np.pi)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()

        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2*np.pi
            return res
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        imax = int(2*np.pi/dtheta)


        for i in range(imin, imax):
            for j in range(imin, imax):
                x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
        ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')

        if len(res_x) != 0:
            xs_to_plot = np.array(res_x)
            for i in range(len(xs_to_plot)):
                xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system)
            ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='orange')
            print('solution: x')
            print(res_x)
            print('solution: u')
            print(res_u)
            print('solution: t')
            print(res_t)
            # draw start and goal
            ax.scatter(start_state[0], goal_state[0], marker='X')
            draw_update_line(ax)
            plt.waitforbuttonpress()

        
        
        #im = planner.visualize_nodes(propagate_system)
        #sec = input('Let us wait for user input')
        #show_image_opencv(im, "planning_tree", wait=True)
        """
        print('plan time: %fs' % (plan_time))
        if len(res_x) == 0:
            print('failed.')
            out_queue.put(-1)
        else:
            print('path succeeded.')
            out_queue.put(plan_time)
Пример #17
0
    def plan_one_path(obs_i, obs, obc, start_state, goal_state,
                      goal_inform_state, max_iteration, data, out_queue):
        if args.env_type == 'pendulum':
            system = standard_cpp_systems.PSOPTPendulum()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps,
                                                       1, 20, step_sz)

        elif args.env_type == 'cartpole_obs':
            #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
            system = _sst_module.CartPole()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve(
                x0, x1, 200, num_steps, step_sz * 1, step_sz * 50, x_init,
                u_init, t_init)
            goal_S0 = np.identity(4)
            goal_rho0 = 1.0
        elif args.env_type in [
                'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3',
                'acrobot_obs_4', 'acrobot_obs_8'
        ]:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))

            psopt_step_sz = 0.02
            psopt_num_steps = 21

            step_sz = 0.02
            num_steps = 21
            goal_radius = 2
            random_seed = 0
            #delta_near=1.0
            #delta_drain=0.5
            delta_near = 0.1
            delta_drain = 0.05
        #print('creating planner...')
        planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path,
                                              cost_mlp_path, cost_encoder_path,
                                              20, psopt_num_steps,
                                              psopt_step_sz, step_sz,
                                              propagate_system, args.device)
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()
        #print('obc:')
        #print(obc.shape)
        #print(delta_near)
        #print(delta_drain)
        #print('start_state:')
        #print(start_state)
        #print('goal_state:')
        #print(goal_state)

        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_autoscale_on(True)
        ax.set_xlim(-np.pi, np.pi)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        #print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]),
                       np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()

        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2 * np.pi)) * (2 * np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2 * np.pi
            return res

        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        imax = int(2 * np.pi / dtheta)

        for i in range(imin, imax):
            for j in range(imin, imax):
                x = np.array([dtheta * i - np.pi, dtheta * j - np.pi, 0., 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:, 0], feasible_points[:, 1], c='yellow')
        ax.scatter(infeasible_points[:, 0], infeasible_points[:, 1], c='pink')
        for i in range(len(data)):
            update_line(hl, ax, data[i])
        draw_update_line(ax)
        state_t = start_state

        pick_goal_threshold = 0.10
        goal_linear_inc_start_iter = int(0.6 * max_iteration)
        goal_linear_inc_end_iter = max_iteration
        goal_linear_inc_end_threshold = 0.95
        goal_linear_inc = (goal_linear_inc_end_threshold -
                           pick_goal_threshold) / (goal_linear_inc_end_iter -
                                                   goal_linear_inc_start_iter)

        for i in range(max_iteration):
            print('iteration: %d' % (i))
            print('state_t:')
            print(state_t)
            # calculate if using goal or not
            use_goal_prob = random.random()
            if i > goal_linear_inc_start_iter:
                pick_goal_threshold += goal_linear_inc
            if use_goal_prob <= pick_goal_threshold:
                flag = 0
            else:
                flag = 1

            bvp_x, bvp_u, bvp_t, mpnet_res = planner.plan_step("sst", propagate_system, psopt_system, obc.flatten(), state_t, goal_inform_state, goal_inform_state, \
                                    flag, goal_radius, max_iteration, distance_computer, \
                                    delta_near, delta_drain)
            plan_time = time.time() - time0

            if len(
                    bvp_u
            ) != 0:  # and bvp_t[0] > 0.01:  # turn bvp_t off if want to use step_bvp

                # propagate data
                p_start = bvp_x[0]
                detail_paths = [p_start]
                detail_controls = []
                detail_costs = []
                state = [p_start]
                control = []
                cost = []
                for k in range(len(bvp_u)):
                    #state_i.append(len(detail_paths)-1)
                    max_steps = int(bvp_t[k] / step_sz)
                    accum_cost = 0.
                    for step in range(1, max_steps + 1):
                        p_start = dynamics(p_start, bvp_u[k], step_sz)
                        p_start = enforce_bounds(p_start)
                        detail_paths.append(p_start)
                        accum_cost += step_sz
                        if (step % 1 == 0) or (step == max_steps):
                            state.append(p_start)
                            cost.append(accum_cost)
                            accum_cost = 0.

                xs_to_plot = np.array(state)
                for i in range(len(xs_to_plot)):
                    xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system)
                #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green')
                ax.scatter(bvp_x[:, 0], bvp_x[:, 1], c='green', s=10.0)
                print('solution: x')
                print(bvp_x)
                print('solution: u')
                print(bvp_u)
                print('solution: t')
                print(bvp_t)
                # draw start and goal
                #ax.scatter(start_state[0], goal_state[0], marker='X')
                draw_update_line(ax)
                xw_scat = ax.scatter(mpnet_res[0],
                                     mpnet_res[1],
                                     c='brown',
                                     s=10.)
                draw_update_line(ax)
                #state_t = state[-1]
                # try using mpnet_res as new start
                state_t = mpnet_res
            else:
                # in incollision
                state_t = start_state
        #if len(res_x) == 0:
        #    print('failed.')
        out_queue.put(0)