Exemplo n.º 1
0
    def __init__(self,
                 sys,
                 sampler,
                 step_size,
                 reached_goal_function,
                 linearize=True):
        self.sys = sys
        self.step_size = step_size
        self.reached_goal_function = reached_goal_function
        pend = Pendulum()
        self.nonlinear_dynamics_step_size = pend.time_step
        self.linearize = linearize

        # self.posi_list = []
        # self.orie_list = []
        # self.new_postion = np.zeros([3,1])
        def plan_collision_free_path_towards(nearest_state, new_state):
            # possible_inputs = np.linspace(*sys.input_limits[:, 0], num=3)
            best_input = None
            best_new_state = None
            best_distance = np.inf
            best_states_list = None
            # for input in possible_inputs: #todo lqr
            states_list = [np.atleast_1d(nearest_state)]
            state = np.atleast_1d(nearest_state)
            for step in range(
                    int(self.step_size / self.nonlinear_dynamics_step_size)):
                # todo change alcontrol_step
                state = self.sys.lqr_forward_step(
                    starting_state=state,
                    target_state=new_state,
                    u=None,
                    modify_system=False,
                    return_as_env=False,
                    step_size=self.nonlinear_dynamics_step_size)
                states_list.append(state)
                #fixme cost=||distance||?
            # state, al_states_list = control_step(starting_state=state, target_state=new_state,
            #                                                           states_list=states_list, sz=self.step_size, ndsz=self.nonlinear_dynamics_step_size)
            new_distance = np.linalg.norm(new_state - state)
            # print(new_distance)
            if new_distance < best_distance:
                best_input = input
                best_new_state = state
                best_distance = new_distance
                best_states_list = states_list
            return best_distance, best_new_state, best_states_list

        RGRRT.__init__(self, self.sys.get_current_state(), sampler,
                       reached_goal_function, plan_collision_free_path_towards)
Exemplo n.º 2
0
def test_pendulum_planning():
    initial_state = np.zeros(2)

    pendulum_system = Pendulum(initial_state=initial_state,
                               input_limits=np.asarray([[-input_limit],
                                                        [input_limit]]),
                               m=1,
                               l=0.5,
                               g=9.8,
                               b=0.1)
    goal_state = np.asarray([np.pi, 0.0])
    goal_state_2 = np.asarray([-np.pi, 0.0])
    step_size = 0.2  # 0.075
    nonlinear_dynamic_step_size = 1e-2

    def uniform_sampler():
        rnd = np.random.rand(2)
        rnd[0] = (rnd[0] - 0.5) * 2 * 1.5 * np.pi
        rnd[1] = (rnd[1] - 0.5) * 2 * 12
        goal_bias_rnd = np.random.rand(1)
        # if goal_bias_rnd <0.2:
        #     return goal_state + [2*np.pi*np.random.randint(-1,1),0] + [np.random.normal(0,0.8),np.random.normal(0,1.5)]
        return rnd

    def gaussian_mixture_sampler():
        gaussian_ratio = 0.0
        rnd = np.random.rand(2)
        rnd[0] = np.random.normal(goal_state[0], 1)
        rnd[1] = np.random.normal(goal_state[1], 1)
        if np.random.rand(1) > gaussian_ratio:
            return uniform_sampler()
        return rnd

    def ring_sampler():
        theta = np.random.rand(1) * 2 * np.pi
        rnd = np.zeros(2)
        r = np.random.rand(1) + 2.5
        rnd[0] = r * np.cos(theta)
        rnd[1] = r * np.sin(theta)
        return rnd

    def contains_goal_function(reachable_set, goal_state):
        distance = np.inf
        if np.linalg.norm(reachable_set.parent_state - goal_state) < 2:
            distance, projection = distance_point_polytope(
                reachable_set.polytope_list, goal_state)
        elif np.linalg.norm(reachable_set.parent_state - goal_state_2) < 2:
            distance, projection = distance_point_polytope(
                reachable_set.polytope_list, goal_state_2)
        else:
            return False, None
        if distance > reachable_set_epsilon:
            return False, None
        #enumerate inputs
        potential_inputs = np.linspace(pendulum_system.input_limits[0, 0],
                                       pendulum_system.input_limits[1, 0],
                                       input_samples)
        for u_i in potential_inputs:
            state_list = []
            state = reachable_set.parent_state
            for step in range(int(step_size / nonlinear_dynamic_step_size)):
                state = pendulum_system.forward_step(
                    u=np.atleast_1d(u_i),
                    linearlize=False,
                    modify_system=False,
                    step_size=nonlinear_dynamic_step_size,
                    return_as_env=False,
                    starting_state=state)
                state_list.append(state)
                if np.linalg.norm(goal_state - state) < goal_tolerance:
                    print('Goal error is %d' %
                          np.linalg.norm(goal_state - state))
                    return True, np.asarray(state_list)
                if np.linalg.norm(goal_state_2 - state) < goal_tolerance:
                    print('Goal error is %d' %
                          np.linalg.norm(goal_state_2 - state))
                    return True, np.asarray(state_list)
        return False, None

    rrt = SymbolicSystem_R3T(pendulum_system, uniform_sampler, step_size, contains_goal_function=contains_goal_function, \
                             use_true_reachable_set=True, use_convex_hull=True)
    found_goal = False
    experiment_name = datetime.fromtimestamp(
        time.time()).strftime('%Y%m%d_%H-%M-%S')

    duration = 0
    os.makedirs('R3T_Pendulum_' + experiment_name)
    allocated_time = 1.25
    for i in range(max_iterations):
        start_time = time.time()
        if rrt.build_tree_to_goal_state(
                goal_state,
                stop_on_first_reach=False,
                allocated_time=allocated_time,
                rewire=False,
                explore_deterministic_next_state=False,
                save_true_dynamics_path=True) is not None:
            found_goal = True
        end_time = time.time()
        #get rrt polytopes
        polytope_reachable_sets = rrt.reachable_set_tree.id_to_reachable_sets.values(
        )
        reachable_polytopes = []
        explored_states = []
        for prs in polytope_reachable_sets:
            reachable_polytopes.append(prs.polytope_list)
            explored_states.append(prs.parent_state)
        # print(explored_states)
        # print(len(explored_states))
        # print('number of nodes',rrt.node_tally)
        goal_override = None
        if found_goal:
            p = rrt.goal_node.parent.state
            if np.linalg.norm(p - np.asarray([np.pi, 0.0])) < np.linalg.norm(
                    p - np.asarray([-np.pi, 0.0])):
                goal_override = np.asarray([np.pi, 0.0])
            else:
                goal_override = np.asarray([-np.pi, 0.0])

        # # Plot state tree
        # fig = plt.figure()
        # ax = fig.add_subplot(111)
        # fig, ax = visualize_node_tree_2D(rrt, fig, ax, s=0.5, linewidths=0.15, show_path_to_goal=found_goal, goal_override=goal_override)
        # # fig, ax = visZ(reachable_polytopes, title="", alpha=0.07, fig=fig,  ax=ax, color='gray')
        # # for explored_state in explored_states:
        # #     plt.scatter(explored_state[0], explored_state[1], facecolor='red', s=6)
        # ax.scatter(initial_state[0], initial_state[1], facecolor='red', s=5)
        # ax.scatter(goal_state[0], goal_state[1], facecolor='green', s=5)
        # ax.scatter(goal_state[0]-2*np.pi, goal_state[1], facecolor='green', s=5)
        # ax.grid(True, which='both')
        # y_formatter = matplotlib.ticker.ScalarFormatter(useOffset=False)
        # ax.yaxis.set_major_formatter(y_formatter)
        # ax.set_yticks(np.arange(-10, 10, 2))
        # ax.set_xlim([-4, 4])
        # ax.set_ylim([-10, 10])
        # ax.set_xlabel('$\\theta (rad)$')
        # ax.set_ylabel('$\dot{\\theta} (rad/s)$')
        #
        duration += (end_time - start_time)
        # plt.title('R3T after %.2f seconds (explored %d nodes)' %(duration, len(polytope_reachable_sets)))
        # plt.savefig('R3T_Pendulum_'+experiment_name+'/%.2f_seconds_tree.png' % duration, dpi=500)
        # # plt.show()
        # plt.xlim([-4, 4])
        # plt.ylim([-10,10])
        # plt.clf()
        # plt.close()

        #
        # # # Plot explored reachable sets
        # # FIXME: Handle degenerated reachable set
        fig = plt.figure()
        ax = fig.add_subplot(111)
        fig, ax = visualize_2D_AH_polytope(reachable_polytopes,
                                           fig=fig,
                                           ax=ax,
                                           N=200,
                                           epsilon=0.01)

        ax.scatter(initial_state[0], initial_state[1], facecolor='red', s=5)
        ax.scatter(goal_state[0], goal_state[1], facecolor='green', s=5)
        ax.scatter(goal_state[0] - 2 * np.pi,
                   goal_state[1],
                   facecolor='green',
                   s=5)

        # ax.set_aspect('equal')
        plt.xlabel('$x$')
        plt.ylabel('$\dot{x}$')
        plt.xlim([-4, 4])
        plt.ylim([-10, 10])
        plt.tight_layout()
        plt.title('$|u| \leq %.2f$ Reachable Set after %.2fs (%d nodes)' %
                  (input_limit, duration, len(polytope_reachable_sets)))
        plt.savefig('R3T_Pendulum_' + experiment_name +
                    '/%.2f_seconds_reachable_sets.png' % duration,
                    dpi=500)
        # plt.show()
        plt.clf()
        # plt.close()
        #
        # if found_goal:
        #     break
        allocated_time *= 2

        # store polytopes
        reachable_polytopes_clean = [[p.T, p.t, p.P.H, p.P.h]
                                     for p in reachable_polytopes]
        with open(
                'R3T_Pendulum_' + experiment_name +
                '/%.2f_seconds_reachable_sets.p' % duration, "wb") as f:
            pickle.dump(reachable_polytopes_clean, f)
Exemplo n.º 3
0
def test_pendulum_planning():
    global best_distance
    best_distance = np.inf
    initial_state = np.zeros(2)
    pendulum_system = Pendulum(initial_state= initial_state, input_limits=np.asarray([[-1],[1]]), m=1, l=0.5, g=9.8, b=0.1)
    goal_state = np.asarray([np.pi,0.0])
    goal_state_2 = np.asarray([-np.pi,0.0])
    step_size = 0.08
    def uniform_sampler():
        rnd = np.random.rand(2)
        rnd[0] = (rnd[0]-0.5)*2*1.5*np.pi
        rnd[1] = (rnd[1]-0.5)*2*12
        goal_bias_rnd = np.random.rand(1)
        # if goal_bias_rnd <0.2:
        #     return goal_state + [2*np.pi*np.random.randint(-1,1),0] + [np.random.normal(0,0.8),np.random.normal(0,1.5)]
        return rnd

    def gaussian_mixture_sampler():
        gaussian_ratio = 0.0
        rnd = np.random.rand(2)
        rnd[0] = np.random.normal(goal_state[0],1)
        rnd[1] = np.random.normal(goal_state[1],1)
        if np.random.rand(1) > gaussian_ratio:
            return uniform_sampler()
        return rnd

    def ring_sampler():
        theta = np.random.rand(1)*2*np.pi
        rnd = np.zeros(2)
        r = np.random.rand(1)+2.5
        rnd[0] = r*np.cos(theta)
        rnd[1] = r*np.sin(theta)
        return rnd

    def big_gaussian_sampler():
        rnd = np.random.rand(2)
        rnd[0] = np.random.normal(0,1.5)
        rnd[1] = np.random.normal(0,3)
        goal_bias_rnd = np.random.rand(1)
        if goal_bias_rnd <0.05:
            return goal_state
        elif goal_bias_rnd < 0.1:
            return np.asarray([-np.pi,0.0])
        return rnd

    def reached_goal_function(state, goal_state):
        global best_distance
        d1 = np.linalg.norm(state-goal_state)
        d2 = np.linalg.norm(state-goal_state_2)
        best_distance = min(best_distance, min(d1, d2))
        if d1<5e-2 or d2<5e-2:
            print('Goal error %f' %min(np.linalg.norm(state-goal_state), np.linalg.norm(state-goal_state_2)))
            return True
        return False

    rrt = SymbolicSystem_RGRRT(pendulum_system, uniform_sampler, step_size, reached_goal_function=reached_goal_function)
    found_goal = False
    experiment_name = datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H-%M-%S')

    duration = 0
    os.makedirs('RG_RRT_Pendulum_'+experiment_name)
    while(1):
        start_time = time.time()
        if rrt.build_tree_to_goal_state(goal_state,stop_on_first_reach=True, allocated_time= 100, rewire=False, explore_deterministic_next_state=False) is not None:
            found_goal = True
        end_time = time.time()

        if found_goal:
            p = rrt.goal_node.parent.state
            if np.linalg.norm(p-np.asarray([np.pi,0.0])) < np.linalg.norm(p-np.asarray([-np.pi,0.0])):
                goal_override = np.asarray([np.pi,0.0])
            else:
                goal_override = np.asarray([-np.pi, 0.0])
            rrt.goal_node.children = []
            rrt.goal_node.true_dynamics_path = [p, goal_override]
        # Plot state tree
        print('Best distance %f' %best_distance)
        fig = plt.figure()
        ax = fig.add_subplot(111)
        if found_goal:
            fig, ax = visualize_node_tree_2D(rrt, fig, ax, s=0.5, linewidths=0.15, show_path_to_goal=True)#, goal_override=goal_override)
        else:
            fig, ax = visualize_node_tree_2D(rrt, fig, ax, s=0.5, linewidths=0.15, show_path_to_goal=found_goal)
        # fig, ax = visZ(reachable_polytopes, title="", alpha=0.07, fig=fig,  ax=ax, color='gray')
        # for explored_state in explored_states:
        #     plt.scatter(explored_state[0], explored_state[1], facecolor='red', s=6)
        ax.scatter(initial_state[0], initial_state[1], facecolor='red', s=5)
        ax.scatter(goal_state[0], goal_state[1], facecolor='green', s=5)
        ax.scatter(goal_state[0]-2*np.pi, goal_state[1], facecolor='green', s=5)
        ax.grid(True, which='both')
        y_formatter = matplotlib.ticker.ScalarFormatter(useOffset=False)
        ax.yaxis.set_major_formatter(y_formatter)
        ax.set_yticks(np.arange(-10, 10, 2))
        ax.set_xlim([-4, 4])
        ax.set_ylim([-10, 10])
        ax.set_xlabel('$\\theta (rad)$')
        ax.set_ylabel('$\dot{\\theta} (rad/s)$')
        duration += (end_time-start_time)
        plt.title('RG-RRT after %.2f seconds (explored %d nodes)' %(duration, rrt.node_tally))
        plt.savefig('RG_RRT_Pendulum_'+experiment_name+'/%.2f_seconds_tree.png' % duration, dpi=500)
        # plt.show()
        plt.xlim([-4, 4])
        plt.ylim([-10,10])
        plt.clf()
        plt.close()

        if found_goal:
            break
Exemplo n.º 4
0
def alcontrol_step(starting_state=None,
                   target_state=None,
                   states_list=None,
                   sz=None,
                   ndsz=None):
    pend = Pendulum()
    # ntc = Pendulum.set_ntc()
    ### the timing parameters for the quad are used
    ### to construct the koopman operator and the adjoint dif-eq
    koopman_operator = KoopmanOperator(pend.time_step)
    adjoint = Adjoint(pend.time_step)
    simulation_time = 1000
    horizon = 20  ### time horizon
    # control_reg = np.diag([1.] * 4) ### control regularization
    control_reg = np.diag([1.]) * 25
    inv_control_reg = np.linalg.inv(control_reg)  ### precompute this
    # default_action = lambda x : np.random.uniform(-0.1, 0.1, size=(4,)) ### in case lqr control returns NAN
    default_action = lambda x: np.random.uniform(-0.1, 0.1, size=(1, ))
    ### initialize the state
    state = starting_state  #np.array([-0.5 * math.pi, 0])
    # target_position = np.array([-1.5 * math.pi, 0.])
    task = Task()  ### this creates the task
    err = np.zeros([simulation_time, 2])
    # posi = np.zeros([simulation_time, 3])
    al_states_list = states_list
    for t in range(int(sz / ndsz)):
        #### measure state and transform through koopman observables
        m_state = get_koop_measurement(starting_state)  # z
        t_state = koopman_operator.transform_all_state(m_state)
        #   err[t] = np.linalg.norm(m_state[:3] - target_orientation) + (m_state[3:]))
        err[t, :] = np.array(
            [state - target_state]
        )  # + np.linalg.norm(m_state[3:6] - target_orientation) + np.linalg.norm(m_state[6:])task.target_expanded_state
        Kx, Ku = koopman_operator.get_linearization(
        )  ### 21*21, 21*4 grab the linear matrices
        lqr_policy = FiniteHorizonLQR(
            Kx, Ku, task.Q, task.R, task.Qf,
            horizon=horizon)  # instantiate a lqr controller
        lqr_policy.set_target_state(
            task.cal_target_expanded_state(target_state)
        )  ## set target state to koopman observable state, task.target_expanded_state
        lqr_policy.sat_val = pend.sat_val  ### set the saturation value
        ### forward sim the koopman dynamical system.
        # traj is z=20*(21*1) fdx 20*(21*21) fdu20(21*4)
        # (optimal policy) action_schedule=policy(state): -K[0]*err
        trajectory, fdx, fdu, action_schedule = koopman_operator.simulate(
            t_state, horizon,
            policy=lqr_policy)  #(here fdx, fdu is just Kx, Ku in a list)
        ldx, ldu = task.get_linearization_from_trajectory(
            trajectory, action_schedule)  # ldx 19*(21) ldu19*(4*1)
        mudx = lqr_policy.get_linearization_from_trajectory(
            trajectory)  # control_gains 20*(4*21)

        rhof = task.mdx(trajectory[-1])  ### get terminal condition for adjoint
        rho = adjoint.simulate_adjoint(rhof, ldx, ldu, fdx, fdu, mudx, horizon)

        ustar = -np.dot(inv_control_reg, fdu[0].T.dot(
            rho[0])) + lqr_policy(t_state)
        ustar = np.clip(ustar, -pend.sat_val,
                        pend.sat_val)  ### saturate control

        if np.isnan(ustar).any():
            ustar = default_action(None)

        ### advacne quad subject to ustar
        next_state = pend.step(state, ustar)
        # next_position=get_position(next_state)

        ### update the koopman operator from data
        koopman_operator.compute_operator_from_data(
            get_koop_measurement(state), ustar,
            get_koop_measurement(next_state))

        state = next_state  ### backend : update the simulator state
        al_states_list.append(state)
        # position=next_position
        ### we can also use a decaying weight on inf gain
        task.inf_weight = 100.0 * (0.99**(t))
        # if t % 100 == 0:
        #     print('time : {}, pose : {}, {}'.format(t*quad.time_step,
        #                                             get_all_measurement(state), ustar))
    t = [i * pend.time_step for i in range(simulation_time)]
    return state, al_states_list