Exemplo n.º 1
0
    def plan_one_path(obs_i, obs, obc, start_state, goal_state,
                      goal_inform_state, max_iteration, out_queue):
        if args.env_type == 'pendulum':
            system = standard_cpp_systems.PSOPTPendulum()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps,
                                                       1, 20, step_sz)

        elif args.env_type == 'cartpole_obs':
            #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
            system = _sst_module.CartPole()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve(
                x0, x1, 200, num_steps, step_sz * 1, step_sz * 50, x_init,
                u_init, t_init)
            goal_S0 = np.identity(4)
            goal_rho0 = 1.0
        elif args.env_type in [
                'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3',
                'acrobot_obs_4', 'acrobot_obs_8'
        ]:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            step_sz = 0.02
            num_steps = 21
            goal_radius = 2.0
            random_seed = 0
            delta_near = .1
            delta_drain = 0.05
        #print('creating planner...')
        planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path,
                                              cost_mlp_path, cost_encoder_path,
                                              args.bvp_iter, num_steps,
                                              step_sz, propagate_system, 3)
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()
        #print('obc:')
        #print(obc.shape)
        #print(delta_near)
        #print(delta_drain)
        #print('start_state:')
        #print(start_state)
        #print('goal_state:')
        #print(goal_state)
        res_x, res_u, res_t = planner.plan("sst", args.plan_type, propagate_system, psopt_system, obc.flatten(), start_state, goal_inform_state, goal_inform_state, \
                                goal_radius, max_iteration, distance_computer, \
                                args.delta_near, args.delta_drain)

        #res_x, res_u, res_t = planner.plan("sst", propagate_system, psopt_system, obc.flatten(), start_state, goal_state, goal_inform_state, \
        #                        goal_radius, max_iteration, propagate_system.distance_computer(), \
        #                        delta_near, delta_drain)
        plan_time = time.time() - time0
        """
        # visualization
        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_autoscale_on(True)
        ax.set_xlim(-np.pi, np.pi)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()

        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2*np.pi
            return res
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        imax = int(2*np.pi/dtheta)


        for i in range(imin, imax):
            for j in range(imin, imax):
                x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
        ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')

        if len(res_x) != 0:
            xs_to_plot = np.array(res_x)
            for i in range(len(xs_to_plot)):
                xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system)
            ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='orange')
            print('solution: x')
            print(res_x)
            print('solution: u')
            print(res_u)
            print('solution: t')
            print(res_t)
            # draw start and goal
            ax.scatter(start_state[0], goal_state[0], marker='X')
            draw_update_line(ax)
            plt.waitforbuttonpress()

        
        
        #im = planner.visualize_nodes(propagate_system)
        #sec = input('Let us wait for user input')
        #show_image_opencv(im, "planning_tree", wait=True)
        """
        print('plan time: %fs' % (plan_time))
        if len(res_x) == 0:
            print('failed.')
            out_queue.put(-1)
        else:
            print('path succeeded.')
            out_queue.put(plan_time)
Exemplo n.º 2
0
    def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, max_iteration, data, out_queue):
        if args.env_type == 'pendulum':
            system = standard_cpp_systems.PSOPTPendulum()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps, 1, 20, step_sz)

        elif args.env_type == 'cartpole_obs':
            #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
            system = _sst_module.CartPole()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve(x0, x1, 200, num_steps, step_sz*1, step_sz*50, x_init, u_init, t_init)
            goal_S0 = np.identity(4)
            goal_rho0 = 1.0
        elif args.env_type in ['acrobot_obs','acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8']:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            bvp_wrapper = _sst_module.PSOPTBVPWrapper(psopt_system, 4, 1, 0)
            step_sz = 0.02
            num_steps = 20
            psopt_num_steps = 20
            psopt_step_sz = 0.02
            goal_radius=2
            random_seed=0
            #delta_near=1.0
            #delta_drain=0.5
            delta_near=0.1
            delta_drain=0.05
        #print('creating planner...')
        planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, 
                                              cost_mlp_path, cost_encoder_path, 
                                              20, psopt_num_steps+1, psopt_step_sz, step_sz, propagate_system, args.device)
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()
        #print('obc:')
        #print(obc.shape)
        #print(delta_near)
        #print(delta_drain)
        #print('start_state:')
        #print(start_state)
        #print('goal_state:')
        #print(goal_state)

        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_autoscale_on(True)
        ax.set_xlim(-np.pi, np.pi)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')
        
        #print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()

        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2*np.pi
            return res
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        imax = int(2*np.pi/dtheta)
        circular = psopt_system.is_circular_topology()


        for i in range(imin, imax):
            for j in range(imin, imax):
                x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
        ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')
        #for i in range(len(data)):
        #    update_line(hl, ax, data[i])
        
        data = np.array(data)
        ax.scatter(data[:,0], data[:,1], c='lightblue', s=10)
        ax.scatter(data[-1,0], data[-1,1], c='red', s=10, marker='*')

        draw_update_line(ax)
        state_t = start_state

        state_t = data[0]
        for data_i in range(0,len(data),num_steps):
            print('iteration: %d' % (data_i))
            print('state_t:')
            print(state_t)    

            
            min_dis_to_goal = 100000.
            min_xs_to_plot = []
            for trials in range(10):
                x_init, u_init, t_init = init_informer(propagate_system, state_t, data[data_i], psopt_num_steps+1, psopt_step_sz)
                print('x_init:')
                print(x_init)

                bvp_x, bvp_u, bvp_t = bvp_wrapper.solve(state_t, x_init[-1], 20, psopt_num_steps+1, 0.8*psopt_step_sz*psopt_num_steps, 2*psopt_step_sz*psopt_num_steps, \
                                                        x_init, u_init, t_init)
                print('bvp_x:')
                print(bvp_x)
                print('bvp_u:')
                print(bvp_u)
                print('bvp_t:')
                print(bvp_t)
                if len(bvp_u) != 0:# and bvp_t[0] > 0.01:  # turn bvp_t off if want to use step_bvp
                    # propagate data
                    #p_start = bvp_x[0]
                    p_start = state_t
                    detail_paths = [p_start]
                    detail_controls = []
                    detail_costs = []
                    state = [p_start]
                    control = []
                    cost = []
                    for k in range(len(bvp_t)):
                        #state_i.append(len(detail_paths)-1)
                        max_steps = int(np.round(bvp_t[k]/step_sz))
                        accum_cost = 0.
                        for step in range(1,max_steps+1):
                            p_start = dynamics(p_start, bvp_u[k], step_sz)
                            p_start = enforce_bounds(p_start)
                            detail_paths.append(p_start)
                            accum_cost += step_sz
                            if (step % 1 == 0) or (step == max_steps):
                                state.append(p_start)
                                cost.append(accum_cost)
                                accum_cost = 0.

                    xs_to_plot = np.array(state)
                    
                    for i in range(len(xs_to_plot)):
                        xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system)
                    delta_x = xs_to_plot[-1] - data[data_i]
                    for i in range(len(delta_x)):
                        if circular[i]:
                            delta_x[i] = delta_x[i] - np.floor(delta_x[i] / (2*np.pi))*(2*np.pi)
                            if delta_x[i] > np.pi:
                                delta_x[i] = delta_x[i] - 2*np.pi
                    dis = np.linalg.norm(delta_x)
                    if dis <= min_dis_to_goal:
                        min_dis_to_goal = dis
                        min_xs_to_plot = xs_to_plot

            #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green')
            ax.scatter(min_xs_to_plot[:,0], min_xs_to_plot[:,1], c='green', s=10.0)

            # draw start and goal
            #ax.scatter(start_state[0], goal_state[0], marker='X')
            draw_update_line(ax)
            #state_t = min_xs_to_plot[-1]
            # try using mpnet_res as new start

            state_t = data[data_i]




            #state_t = min_xs_to_plot[-1]
            print('data_i:')

            print(data[data_i])
            #else:
            #    # in incollision
            #    state_t = data[data_i]
        #if len(res_x) == 0:
        #    print('failed.')
        out_queue.put(0)
Exemplo n.º 3
0
    def plan_one_path(obs_i, obs, obc, detailed_data_path, data_path,
                      start_state, goal_state, goal_inform_state, cost_i,
                      max_iteration, out_queue_t, out_queue_cost, random_seed):
        if args.env_type == 'pendulum':
            system = standard_cpp_systems.PSOPTPendulum()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps,
                                                       1, 20, step_sz)

        elif args.env_type == 'cartpole_obs':
            #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
            obs_width = 4.0
            psopt_system = _sst_module.PSOPTCartPole()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 4., 'cartpole')
            #distance_computer = propagate_system.distance_computer()
            distance_computer = _sst_module.euclidean_distance(
                np.array(propagate_system.is_circular_topology()))
            step_sz = 0.002
            num_steps = 21
            goal_radius = 1.5
            random_seed = 0
            delta_near = 2.0
            delta_drain = 1.2
            #delta_near=.2
            #delta_drain=.1
            cost_threshold = 1.05
            min_time_steps = 10
            max_time_steps = 200
            #min_time_steps = 5
            #max_time_steps = 400
            integration_step = 0.002
        elif args.env_type in [
                'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3',
                'acrobot_obs_4', 'acrobot_obs_8'
        ]:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            step_sz = 0.02
            num_steps = 21
            goal_radius = 2.0
            random_seed = 0
            delta_near = 1.0
            delta_drain = 0.5
            cost_threshold = 1.05
            min_time_steps = 5
            max_time_steps = 100
            integration_step = 0.02
        planner = _sst_module.SSTWrapper(
            state_bounds=propagate_system.get_state_bounds(),
            control_bounds=propagate_system.get_control_bounds(),
            distance=distance_computer,
            start_state=start_state,
            goal_state=goal_state,
            goal_radius=goal_radius,
            random_seed=random_seed,
            sst_delta_near=delta_near,
            sst_delta_drain=delta_drain)
        #print('creating planner...')
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()

        for i in range(max_iteration):
            planner.step(propagate_system, min_time_steps, max_time_steps,
                         integration_step)

            # early break for initial path
            solution = planner.get_solution()
            if solution is not None:
                #print('solution found already.')
                # based on cost break
                xs, us, ts = solution
                t_sst = np.sum(ts)
                #print(t_sst)
                #print(cost_i)
                if t_sst <= cost_i * cost_threshold:
                    print('solved in %d iterations' % (i))
                    break
        plan_time = time.time() - time0
        solution = planner.get_solution()
        xs, us, ts = solution
        print(np.linalg.norm(np.array(xs[-1]) - goal_state))
        """
        # visualization
        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_autoscale_on(True)
        #ax.set_xlim(-30, 30)
        ax.set_xlim(-np.pi, np.pi)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        #print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()
            
        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2*np.pi
            return res
        dx = 1
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        #imax = int(2*30./dx)
        imax = int(2*np.pi/dtheta)
        jmin = 0
        jmax = int(2*np.pi/dtheta)


        for i in range(imin, imax):
            for j in range(jmin, jmax):
                x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
        ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')
        #for i in range(len(data)):
        #    update_line(hl, ax, data[i])
        draw_update_line(ax)
        #state_t = start_state

        xs_to_plot = np.array(detailed_data_path)
        for i in range(len(xs_to_plot)):
            xs_to_plot[i] = wrap_angle(detailed_data_path[i], propagate_system)
        ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='lightgreen', s=0.5)
        # draw start and goal
        #ax.scatter(start_state[0], goal_state[0], marker='X')
        draw_update_line(ax)
        #plt.waitforbuttonpress()

        
        if solution is not None:
            xs, us, ts = solution
            
            # propagate data
            p_start = xs[0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(us)):
                #state_i.append(len(detail_paths)-1)
                print(ts[k])
                max_steps = int(np.round(ts[k]/step_sz))
                accum_cost = 0.
                #print('p_start:')
                #print(p_start)
                #print('data:')
                #print(paths[i][j][k])
                # modify it because of small difference between data and actual propagation
                for step in range(1,max_steps+1):
                    p_start = dynamics(p_start, us[k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    accum_cost += step_sz
                    if (step % 1 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        cost.append(accum_cost)
                        accum_cost = 0.
            #print('p_start:')
            #print(p_start)
            #print('data:')
            #print(paths[i][j][-1])
            #state[-1] = xs[-1]
            
            
            
            xs_to_plot = np.array(state)
            for i in range(len(xs_to_plot)):
                xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system)
            ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green', s=0.5)
            start_state_np = np.array(start_state)
            goal_state_np = np.array(goal_state)
            ax.scatter([start_state_np[0]], [start_state_np[1]], c='blue', marker='*')
            ax.scatter([goal_state_np[0]], [goal_state_np[1]], c='red', marker='*')

            # draw start and goal
            #ax.scatter(start_state[0], goal_state[0], marker='X')
            draw_update_line(ax)
            plt.waitforbuttonpress()
        """

        # validate if the path contains collision
        if solution is not None:
            res_x, res_u, res_t = solution

            print('solution_x:')
            print(res_x)
            print('path_x:')
            print(np.array(data_path))

            # propagate data
            p_start = res_x[0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(res_u)):
                #state_i.append(len(detail_paths)-1)
                max_steps = int(np.round(res_t[k] / step_sz))
                accum_cost = 0.
                #print('p_start:')
                #print(p_start)
                #print('data:')
                #print(paths[i][j][k])
                # modify it because of small difference between data and actual propagation
                #p_start = res_x[k]
                #state[-1] = res_x[k]
                for step in range(1, max_steps + 1):
                    p_start = dynamics(p_start, res_u[k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    accum_cost += step_sz
                    if (step % 1 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        cost.append(accum_cost)
                        accum_cost = 0.
                        # check collision for the new state
                        if IsInCollision(p_start, obs_i):
                            print(
                                'collision happens at u_index: %d, step: %d' %
                                (k, step))
                        assert not IsInCollision(p_start, obs_i)

            #print('p_start:')
            #print(p_start)
            #print('data:')
            #print(paths[i][j][-1])
            #state[-1] = res_x[-1]
        # validation end

        print('plan time: %fs' % (plan_time))
        if solution is None:
            print('failed.')
            out_queue_t.put(-1)
            out_queue_cost.put(-1)
        else:
            print('path succeeded.')
            out_queue_t.put(plan_time)
            out_queue_cost.put(t_sst)
Exemplo n.º 4
0
        print(len(self.states))
        self.total = len(self.states)
        ani = animation.FuncAnimation(self.fig,
                                      self._animate,
                                      range(0, len(self.states), 20),
                                      interval=self.dt,
                                      blit=True,
                                      init_func=self._init,
                                      repeat=True)
        return ani


# generate start and goal
print('here')
#obs_list = np.array(obs_list)
system = standard_cpp_systems.RectangleObs(obs_list, width, 'car')
#system = CartPoleObs(obs_list)
# Create SST planner
min_time_steps = 20
max_time_steps = 200
integration_step = 0.002
max_iter = 1500000  # increase this to 1500000 to ensure more optimal solution can be found
goal_radius = 2.
random_seed = 0
sst_delta_near = .6
sst_delta_drain = .2

low = []
high = []
state_bounds = system.get_state_bounds()
for i in range(len(state_bounds)):
    def plan_one_path(obs_i, obs, obc, start_state, goal_state, goal_inform_state, cost_i, max_iteration, out_queue):
        if args.env_type in ['acrobot_obs','acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4', 'acrobot_obs_8']:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            step_sz = 0.02
            num_steps = 21
            goal_radius=2.0
            random_seed=0
            delta_near=1.0
            delta_drain=0.5
            device = 3
        #print('creating planner...')
        planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, \
                                              200, num_steps, step_sz, propagate_system, device)
        #cost_threshold = cost_i * 1.1
        cost_threshold = 100000000.
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()
        res_x, res_u, res_t = planner.plan_tree_SMP_cost_gradient("sst", propagate_system, psopt_system, obc.flatten(), start_state, goal_inform_state, goal_inform_state, \
                                goal_radius, max_iteration, distance_computer, \
                                delta_near, delta_drain, cost_threshold, 15)
        plan_time = time.time() - time0

        """
        # visualization
        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_autoscale_on(True)
        ax.set_xlim(-np.pi, np.pi)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        #print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()

        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2*np.pi
            return res
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        imax = int(2*np.pi/dtheta)


        for i in range(imin, imax):
            for j in range(imin, imax):
                x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
        ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')
        #for i in range(len(data)):
        #    update_line(hl, ax, data[i])
        draw_update_line(ax)
        #state_t = start_state
                
        if len(res_u):
            # propagate data
            p_start = res_x[0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(res_u)):
                #state_i.append(len(detail_paths)-1)
                max_steps = int(res_t[k]/step_sz)
                accum_cost = 0.
                #print('p_start:')
                #print(p_start)
                #print('data:')
                #print(paths[i][j][k])
                # modify it because of small difference between data and actual propagation
                p_start = res_x[k]
                state[-1] = res_x[k]
                for step in range(1,max_steps+1):
                    p_start = dynamics(p_start, res_u[k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    accum_cost += step_sz
                    if (step % 1 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        cost.append(accum_cost)
                        accum_cost = 0.
            #print('p_start:')
            #print(p_start)
            #print('data:')
            #print(paths[i][j][-1])
            state[-1] = res_x[-1]
            
            
            
            xs_to_plot = np.array(state)
            for i in range(len(xs_to_plot)):
                xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system)
                if IsInCollision(xs_to_plot[i], obs_i):
                    print('in collision')
            ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green')
            # draw start and goal
            #ax.scatter(start_state[0], goal_state[0], marker='X')
            draw_update_line(ax)
            plt.waitforbuttonpress()
        """
        
        #im = planner.visualize_nodes(propagate_system)
        #sec = input('Let us wait for user input')
        #show_image_opencv(im, "planning_tree", wait=True)
        
        # validate if the path contains collision
        """
        if len(res_u):
            # propagate data
            p_start = res_x[0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(res_u)):
                #state_i.append(len(detail_paths)-1)
                max_steps = int(res_t[k]/step_sz)
                accum_cost = 0.
                #print('p_start:')
                #print(p_start)
                #print('data:')
                #print(paths[i][j][k])
                # modify it because of small difference between data and actual propagation
                p_start = res_x[k]
                state[-1] = res_x[k]
                for step in range(1,max_steps+1):
                    p_start = dynamics(p_start, res_u[k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    accum_cost += step_sz
                    if (step % 1 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        cost.append(accum_cost)
                        accum_cost = 0.
                        # check collision for the new state
                        assert not IsInCollision(p_start, obs_i)
                        
            #print('p_start:')
            #print(p_start)
            #print('data:')
            #print(paths[i][j][-1])
            state[-1] = res_x[-1]
        # validation end
        """
        
        print('plan time: %fs' % (plan_time))
        if len(res_x) == 0:
            print('failed.')
            out_queue.put(-1)
        else:
            print('path succeeded.')
            print('cost: %f' % (np.sum(res_t)))
            print('cost_threshold: %f' % (cost_threshold))
            print('data cost: %f' % (cost_i))
            out_queue.put(plan_time)
    def plan_one_path(obs_i, obs, obc, start_state, goal_state,
                      goal_inform_state, cost_i, max_iteration, data,
                      out_queue):
        if args.env_type == 'pendulum':
            system = standard_cpp_systems.PSOPTPendulum()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps,
                                                       1, 20, step_sz)
        elif args.env_type == 'cartpole_obs':
            obs_width = 4.0
            psopt_system = _sst_module.PSOPTCartPole()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, obs_width, 'cartpole')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            step_sz = 0.002
            num_steps = 101
            goal_radius = 1.5
            random_seed = 0
            delta_near = 2.0
            delta_drain = 1.2
            device = 3
            num_sample = 10
            min_time_steps = 10
            max_time_steps = 200
            mpnet_goal_threshold = 2.0
            mpnet_length_threshold = 40
            pick_goal_init_threshold = 0.1
            pick_goal_end_threshold = 0.8
            pick_goal_start_percent = 0.4
        elif args.env_type in [
                'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3',
                'acrobot_obs_4', 'acrobot_obs_8'
        ]:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            step_sz = 0.02
            num_steps = 21
            goal_radius = 2.0
            random_seed = 0
            delta_near = 1.0
            delta_drain = 0.5
        #print('creating planner...')
        planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, \
                                              200, num_steps, step_sz, propagate_system, 3)
        #cost_threshold = cost_i * 1.1
        cost_threshold = 100000000.
        """
        # visualization
        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_autoscale_on(True)
        ax.set_xlim(-np.pi, np.pi)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        #print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()

        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2*np.pi
            return res
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        imax = int(2*np.pi/dtheta)


        for i in range(imin, imax):
            for j in range(imin, imax):
                x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
        ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')
        for i in range(len(data)):
            update_line(hl, ax, data[i])
        draw_update_line(ax)        
        # visualization end
        """

        # visualize for cartpole
        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.set_autoscale_on(True)
        hl, = ax.plot([], [], 'b')

        #hl_real, = ax.plot([], [], 'r')
        def update_line(h, ax, new_data):
            h.set_data(np.append(h.get_xdata(), new_data[0]),
                       np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def draw_update_line(ax):
            ax.relim()
            ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()

        # randomly pick up a point in the data, and find similar data in the dataset
        # plot the next point
        #ax.set_autoscale_on(True)
        ax.set_xlim(-30, 30)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        #print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]),
                       np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()

        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2 * np.pi)) * (2 * np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2 * np.pi
            return res

        dx = 1
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        imax = int(2 * 30. / dx)
        jmin = 0
        jmax = int(2 * np.pi / dtheta)

        for i in range(imin, imax):
            for j in range(jmin, jmax):
                x = np.array([dx * i - 30, 0., dtheta * j - np.pi, 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:, 0], feasible_points[:, 2], c='yellow')
        ax.scatter(infeasible_points[:, 0], infeasible_points[:, 2], c='pink')
        #for i in range(len(data)):
        #    update_line(hl, ax, data[i])
        draw_update_line(ax)
        #state_t = start_state
        # visualization end

        # generate a path by using SST to plan for some maximal iterations

        state_t = start_state
        pick_goal_threshold = .0
        ax.scatter(goal_inform_state[0],
                   goal_inform_state[2],
                   marker='*',
                   c='red')  # cartpole

        for i in range(max_iteration):
            time0 = time.time()
            # determine if picking goal based on iteration number
            goal_prob = random.random()
            #flag=1: using MPNet
            #flag=0: not using MPNet
            if goal_prob <= pick_goal_threshold:
                flag = 0
            else:
                flag = 1
            bvp_x, bvp_u, bvp_t, mpnet_res = planner.plan_tree_SMP_step("sst", propagate_system, psopt_system, obc.flatten(), state_t, goal_inform_state, goal_inform_state, \
                                flag, goal_radius, max_iteration, distance_computer, \
                                delta_near, delta_drain, cost_threshold)

            if len(
                    bvp_u
            ) != 0:  # and bvp_t[0] > 0.01:  # turn bvp_t off if want to use step_bvp
                xw_scat = ax.scatter(mpnet_res[0],
                                     mpnet_res[2],
                                     c='lightgreen')
                draw_update_line(ax)

                # propagate data
                p_start = bvp_x[0]
                detail_paths = [p_start]
                detail_controls = []
                detail_costs = []
                state = [p_start]
                control = []
                cost = []
                for k in range(len(bvp_u)):
                    #state_i.append(len(detail_paths)-1)
                    max_steps = int(bvp_t[k] / step_sz)
                    accum_cost = 0.
                    for step in range(1, max_steps + 1):
                        p_start = dynamics(p_start, bvp_u[k], step_sz)
                        p_start = enforce_bounds(p_start)
                        detail_paths.append(p_start)
                        accum_cost += step_sz
                        if (step % 1 == 0) or (step == max_steps):
                            state.append(p_start)
                            cost.append(accum_cost)
                            accum_cost = 0.

                xs_to_plot = np.array(state)
                for j in range(len(xs_to_plot)):
                    xs_to_plot[j] = wrap_angle(xs_to_plot[j], propagate_system)
                xs_to_plot = xs_to_plot[::5]
                #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green')  # acrobot
                ax.scatter(xs_to_plot[:, 0], xs_to_plot[:, 2],
                           c='green')  # cartpole

                #ax.scatter(bvp_x[:,0], bvp_x[:,1], c='green')
                print('solution: x')
                print(bvp_x)
                print('solution: u')
                print(bvp_u)
                print('solution: t')
                print(bvp_t)
                # draw start and goal
                #ax.scatter(start_state[0], goal_state[0], marker='X')
                draw_update_line(ax)
                #state_t = state[-1]

            print('state_t:')
            print(state_t)
            print('mpnet_res')
            print(mpnet_res)
            # based on flag, determine how to change state_t
            if flag:
                # only change state_t if in MPNet inform mode
                #if len(bvp_u) != 0:
                if True:
                    # try using steered result as next start
                    #if not IsInCollision(state_t, obs_i):
                    state_t = mpnet_res
                    print('after copying to state_t:')
                    print('state_t')
                    print(state_t)
                    # if in collision, then not using it
                else:
                    print('failure')
                    state_t = start_state  # failed BVP, back to origin

        plan_time = time.time() - time0

        print('plan time: %fs' % (plan_time))
        if len(res_u) == 0:
            print('failed.')
            out_queue.put(-1)
        else:
            print('path succeeded.')
            print('cost: %f' % (np.sum(res_t)))
            print('cost_threshold: %f' % (cost_threshold))
            print('data cost: %f' % (cost_i))
            out_queue.put(plan_time)
    def plan_one_path(obs_i, obs, obc, start_state, goal_state,
                      goal_inform_state, max_iteration, data, out_queue):
        if args.env_type == 'pendulum':
            system = standard_cpp_systems.PSOPTPendulum()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps,
                                                       1, 20, step_sz)

        elif args.env_type == 'cartpole_obs':
            #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
            system = _sst_module.CartPole()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve(
                x0, x1, 200, num_steps, step_sz * 1, step_sz * 50, x_init,
                u_init, t_init)
            goal_S0 = np.identity(4)
            goal_rho0 = 1.0
        elif args.env_type in [
                'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3',
                'acrobot_obs_4', 'acrobot_obs_8'
        ]:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))

            step_sz = 0.02
            num_steps = 21
            goal_radius = 2
            random_seed = 0
            #delta_near=1.0
            #delta_drain=0.5
            delta_near = 0.1
            delta_drain = 0.05
        #print('creating planner...')
        planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path,
                                              cost_mlp_path, cost_encoder_path,
                                              20, num_steps, step_sz,
                                              propagate_system, args.device)
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()
        #print('obc:')
        #print(obc.shape)
        #print(delta_near)
        #print(delta_drain)
        #print('start_state:')
        #print(start_state)
        #print('goal_state:')
        #print(goal_state)

        state_t = start_state

        pick_goal_threshold = 0.10
        goal_linear_inc_start_iter = int(0.6 * max_iteration)
        goal_linear_inc_end_iter = max_iteration
        goal_linear_inc_end_threshold = 0.95
        goal_linear_inc = (goal_linear_inc_end_threshold -
                           pick_goal_threshold) / (goal_linear_inc_end_iter -
                                                   goal_linear_inc_start_iter)

        start_time = time.time()
        plan_time = -1
        for i in tqdm(range(max_iteration)):
            print('iteration: %d' % (i))
            print('state_t:')
            print(state_t)
            # calculate if using goal or not
            use_goal_prob = random.random()
            if i > goal_linear_inc_start_iter:
                pick_goal_threshold += goal_linear_inc
            if use_goal_prob <= pick_goal_threshold:
                flag = 0
            else:
                flag = 1

            bvp_x, bvp_u, bvp_t, mpnet_res = planner.plan_step("sst", propagate_system, psopt_system, obc.flatten(), state_t, goal_inform_state, goal_inform_state, \
                                    flag, goal_radius, max_iteration, distance_computer, \
                                    delta_near, delta_drain)

            solution = planner.get_solution()
            if solution is not None:
                plan_time = time.time() - start_time

        if plan_time == -1:
            print('failed.')
            out_queue.put(-1)
        else:
            print('path succeeded.')
            out_queue.put(plan_time)  #if len(res_x) == 0:
Exemplo n.º 8
0
    def plan_one_path(obs_i, obs, obc, start_state, goal_state,
                      goal_inform_state, max_iteration, data, out_queue):
        if args.env_type == 'pendulum':
            system = standard_cpp_systems.PSOPTPendulum()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps,
                                                       1, 20, step_sz)

        elif args.env_type == 'cartpole_obs':
            #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
            system = _sst_module.CartPole()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1, x_init, u_init, t_init: bvp_solver.solve(
                x0, x1, 200, num_steps, step_sz * 1, step_sz * 50, x_init,
                u_init, t_init)
            goal_S0 = np.identity(4)
            goal_rho0 = 1.0
        elif args.env_type in [
                'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3',
                'acrobot_obs_4', 'acrobot_obs_8'
        ]:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))

            psopt_step_sz = 0.02
            psopt_num_steps = 21

            step_sz = 0.02
            num_steps = 21
            goal_radius = 2
            random_seed = 0
            #delta_near=1.0
            #delta_drain=0.5
            delta_near = 0.1
            delta_drain = 0.05
        #print('creating planner...')
        planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path,
                                              cost_mlp_path, cost_encoder_path,
                                              20, psopt_num_steps,
                                              psopt_step_sz, step_sz,
                                              propagate_system, args.device)
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()
        #print('obc:')
        #print(obc.shape)
        #print(delta_near)
        #print(delta_drain)
        #print('start_state:')
        #print(start_state)
        #print('goal_state:')
        #print(goal_state)

        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_autoscale_on(True)
        ax.set_xlim(-np.pi, np.pi)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        #print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]),
                       np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()

        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2 * np.pi)) * (2 * np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2 * np.pi
            return res

        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        imax = int(2 * np.pi / dtheta)

        for i in range(imin, imax):
            for j in range(imin, imax):
                x = np.array([dtheta * i - np.pi, dtheta * j - np.pi, 0., 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:, 0], feasible_points[:, 1], c='yellow')
        ax.scatter(infeasible_points[:, 0], infeasible_points[:, 1], c='pink')
        for i in range(len(data)):
            update_line(hl, ax, data[i])
        draw_update_line(ax)
        state_t = start_state

        pick_goal_threshold = 0.10
        goal_linear_inc_start_iter = int(0.6 * max_iteration)
        goal_linear_inc_end_iter = max_iteration
        goal_linear_inc_end_threshold = 0.95
        goal_linear_inc = (goal_linear_inc_end_threshold -
                           pick_goal_threshold) / (goal_linear_inc_end_iter -
                                                   goal_linear_inc_start_iter)

        for i in range(max_iteration):
            print('iteration: %d' % (i))
            print('state_t:')
            print(state_t)
            # calculate if using goal or not
            use_goal_prob = random.random()
            if i > goal_linear_inc_start_iter:
                pick_goal_threshold += goal_linear_inc
            if use_goal_prob <= pick_goal_threshold:
                flag = 0
            else:
                flag = 1

            bvp_x, bvp_u, bvp_t, mpnet_res = planner.plan_step("sst", propagate_system, psopt_system, obc.flatten(), state_t, goal_inform_state, goal_inform_state, \
                                    flag, goal_radius, max_iteration, distance_computer, \
                                    delta_near, delta_drain)
            plan_time = time.time() - time0

            if len(
                    bvp_u
            ) != 0:  # and bvp_t[0] > 0.01:  # turn bvp_t off if want to use step_bvp

                # propagate data
                p_start = bvp_x[0]
                detail_paths = [p_start]
                detail_controls = []
                detail_costs = []
                state = [p_start]
                control = []
                cost = []
                for k in range(len(bvp_u)):
                    #state_i.append(len(detail_paths)-1)
                    max_steps = int(bvp_t[k] / step_sz)
                    accum_cost = 0.
                    for step in range(1, max_steps + 1):
                        p_start = dynamics(p_start, bvp_u[k], step_sz)
                        p_start = enforce_bounds(p_start)
                        detail_paths.append(p_start)
                        accum_cost += step_sz
                        if (step % 1 == 0) or (step == max_steps):
                            state.append(p_start)
                            cost.append(accum_cost)
                            accum_cost = 0.

                xs_to_plot = np.array(state)
                for i in range(len(xs_to_plot)):
                    xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system)
                #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green')
                ax.scatter(bvp_x[:, 0], bvp_x[:, 1], c='green', s=10.0)
                print('solution: x')
                print(bvp_x)
                print('solution: u')
                print(bvp_u)
                print('solution: t')
                print(bvp_t)
                # draw start and goal
                #ax.scatter(start_state[0], goal_state[0], marker='X')
                draw_update_line(ax)
                xw_scat = ax.scatter(mpnet_res[0],
                                     mpnet_res[1],
                                     c='brown',
                                     s=10.)
                draw_update_line(ax)
                #state_t = state[-1]
                # try using mpnet_res as new start
                state_t = mpnet_res
            else:
                # in incollision
                state_t = start_state
        #if len(res_x) == 0:
        #    print('failed.')
        out_queue.put(0)