示例#1
0
def test_lds(steps=1000, show_plot=False, verbose=False):
    T = steps
    n, m, d = 10, 10, 2  # state, input, and observation dimension
    environment = tigercontrol.environment("LDS")

    system_params = {}
    gaussian = lambda dims: random.normal(generate_key(), shape=dims)
    for matrix, shape in {
            'A': (n, n),
            'B': (n, m),
            'C': (d, n),
            'D': (d, m)
    }.items():
        system_params[matrix] = gaussian(shape)
    system_params_only_AC = {}
    system_params_only_AC['A'] = system_params['A']
    system_params_only_AC['C'] = system_params['C']
    x_init = gaussian((n, ))

    # Test custom noise
    custom_nd_vector = lambda x, u: (0.1 * x + np.cos(u), 0)
    custom_nd_scalar = lambda x, u: 0.1 * x + np.cos(u)
    environment = tigercontrol.environment("LDS")
    environment.reset(n,
                      m,
                      d,
                      partially_observable=True,
                      noise_distribution=custom_nd_vector)
    environment = tigercontrol.environment("LDS")
    environment.reset(
        n,
        m,
        d=None,
        partially_observable=False,
        noise_distribution=custom_nd_scalar)  #, system_params={})

    test_output = []
    for t in range(T):
        u = random.normal(generate_key(), shape=(m, ))
        test_output.append(environment.step(u))

    info = environment.hidden()
    if verbose:
        print(info)

    assert environment.T == T
    if show_plot:
        plt.plot(test_output)
        plt.title("lds")
        plt.show(block=False)
        plt.pause(3)
        plt.close()
    print("test_lds passed")
    return
示例#2
0
def get_trajectory(environment, controller, T=100):

    (environment_id, environment_params) = environment
    (controller_id, controller_params) = controller

    environment = tigercontrol.environment(environment_id)
    x = environment.reset(**environment_params)

    controller_params['A'], controller_params[
        'B'] = environment.A, environment.B

    controller = tigercontrol.controller(controller_id)
    controller.initialize(**controller_params)

    trajectory = []
    norms = []
    avg_regret = []
    cur_avg = 0

    u = controller.plan(x, T)

    for i in range(T):
        x = environment.step(u[i])
        trajectory.append(x)
        norms.append(np.linalg.norm(x))
        cur_avg = (i / (i + 1)) * cur_avg + (np.linalg.norm(x) +
                                             np.linalg.norm(u[i])) / (i + 1)
        avg_regret.append(cur_avg)

    return trajectory, norms, avg_regret
示例#3
0
def test_sgd_lstm(show=False):
    environment = tigercontrol.environment('LDS')
    x = environment.reset(p=2,q=0)

    controller = tigercontrol.controllers('LSTM')
    controller.initialize(n=1, m=1, l=3, h=10, optimizer=SGD) # initialize with class
    controller.predict(1.0) # call controllers to verify it works
    controller.update(1.0)

    optimizer = SGD(learning_rate=0.001)
    controller = tigercontrol.controllers('LSTM')
    controller.initialize(n=1, m=1, l=3, h=10, optimizer=optimizer) # reinitialize with instance

    loss = []
    for t in range(1000):
        y_pred = controller.predict(x)
        y_true = environment.step()
        loss.append(mse(y_pred, y_true))
        controller.update(y_true)
        x = y_true

    if show:
        plt.title("Test SGD on LQR(3) with LSTM controller")
        plt.plot(loss)
        plt.show(block=False)
        plt.pause(3)
        plt.close()
示例#4
0
def test_minitaur(steps=1000, verbose=False):
    environment = tigercontrol.environment("PyBullet-Minitaur")
    environment.reset(render=verbose)

    sum_reward = 0
    amplitude1 = 0.5
    amplitude2 = 0.5
    speed = 40

    for step_counter in range(steps):
        time_step = 0.01
        t = step_counter * time_step
        a1 = np.sin(t * speed) * amplitude1
        a2 = np.sin(t * speed + np.pi) * amplitude1
        a3 = np.sin(t * speed) * amplitude2
        a4 = np.sin(t * speed + np.pi) * amplitude2
        action = [a1, a2, a2, a1, a3, a4, a4, a3]
        _, reward, done, _ = environment.step(action)
        time.sleep(1. / 100.)

        sum_reward += reward
        if done:
            environment.reset()

    environment.close()
    print("test_minitaur passed")
def test_custom_controller(steps=1000, show_plot=False):
    # initial preparation
    T = steps
    loss = lambda y_true, y_pred: (y_true - y_pred)**2
    environment = tigercontrol.environment("LDS")  # return class
    n, m = 2, 2  # state and action dimension
    env = environment()
    cur_x = env.initialize(n, m)

    # simple zero custom controller implementation
    class Custom(tigercontrol.CustomController):
        def get_action(self, x):
            return np.zeros((m, 1))

        def update(self, y):
            pass

    # try registering and calling the custom controller
    tigercontrol.register_custom_controller(Custom, "TestCustomController")
    custom_controller = tigercontrol.controller("TestCustomController")
    controller = custom_controller()

    results = []
    for i in range(T):
        a = controller.get_action(cur_x)
        cur_x = env.step(a)

    if show_plot:
        plt.plot(results)
        plt.title("Custom (last value) controller on LQR environment")
        plt.show(block=False)
        plt.pause(1)
        plt.close()
    print("test_custom_controller passed")
    return
示例#6
0
def test_simulator(verbose=False):
    environment = tigercontrol.environment("PyBullet-CartPole")
    obs = environment.reset(render=verbose)

    controller = tigercontrol.controllers("CartPoleNN")
    controller.initialize(environment.get_observation_space(),
                          environment.get_action_space())

    t_start = time.time()
    save_to_mem_ID = -1

    frame = 0
    score = 0
    restart_delay = 0
    while time.time() - t_start < 3:
        a = controller.predict(obs)
        obs, r, done, _ = environment.step(a)
        score += r
        frame += 1
        if verbose:
            time.sleep(1. / 60.)

    if verbose:
        print("about to save state")
    save_to_mem_ID = environment.getState()
    if verbose:
        print("save_state_ID: " + str(save_to_mem_ID))

    # run simulator for 4 seconds
    environment.loadState(environment.getState())
    sim = environment.fork()

    if verbose:
        print("environment.loadState worked")
    sim_score = score
    sim_frame = frame
    while time.time() - t_start < 3:
        if verbose:
            time.sleep(1. / 60.)
        a = controller.predict(obs)
        obs, r, done, _ = environment.step(a)
        sim_score += r
        sim_frame += 1

    # resume stepping through environment for 2 seconds from the point when the simulator was launched (i.e. t = 1)
    environment.loadState(save_to_mem_ID)
    if verbose:
        print("environment.loadState worked")
    while time.time() - t_start < 3:
        a = controller.predict(obs)
        obs, r, done, _ = environment.step(a)
        score += r
        frame += 1
        if verbose:
            time.sleep(1. / 60.)

    environment.close()
    print("test_simulator passed")
示例#7
0
def test_stress(env_id):
    """ Description: run environment in a few high pressure situations """
    environment_class = tigercontrol.environment(env_id)
    env = environment_class()
    n, m = env.get_state_dim(), env.get_action_dim(
    )  # get dimensions of system
    x_0 = env.reset()
    for t in range(10000):
        env.step(t * np.ones(m))
示例#8
0
def test_api(env_id):
    """ Description: verify that all default methods work for this environment """
    environment_class = tigercontrol.environment(env_id)
    env = environment_class()
    n, m = env.get_state_dim(), env.get_action_dim(
    )  # get dimensions of system
    x_0 = env.reset()  # first state
    x_1 = env.step(np.zeros(m))  # try step with 0 action
    assert np.isclose(
        x_0, env.reset())  # assert reset return back to original state
示例#9
0
def test_cartpole_double(verbose=False):
    # try to break this test
    environment = tigercontrol.environment("PyBullet-CartPoleSwingup")
    obs = environment.reset(render=False)
    #environment.close()

    environment = tigercontrol.environment("PyBullet-CartPoleDouble")
    obs = environment.reset(render=verbose)

    controller = SmallReactivePolicy()
    controller.initialize(environment.get_observation_space(), environment.get_action_space())

    t_start = time.time()
    save_to_mem_ID = -1

    frame = 0
    score = 0
    restart_delay = 0
    saved = False
    while time.time() - t_start < 3:
        time.sleep(1. / 60.)
        a = controller.predict(obs)
        obs, r, done, _ = environment.step(a)

        score += r
        frame += 1
        if time.time() - t_start > 0 and not saved:
            if verbose:
                print("about to save to memory")
            #save_to_mem_ID = environment.getState()
            saved = True
        if not done: continue
        if restart_delay == 0:
            if verbose:
                print("score=%0.2f in %i frames" % (score, frame))
            restart_delay = 60 * 2  # 2 sec at 60 fps
        else:
            restart_delay -= 1
            if restart_delay > 0: continue
            break

    environment.close()
    print("test_cartpole_double passed")
def test_ons(show=False):

    #tigercontrol.set_key(0) # consistent randomness

    environment = tigercontrol.environment('LDS')
    x, y_true = environment.reset()

    controllers = []
    labels = ['OGD', 'ONS', 'Adam'] # don't run deprecated ONS

    controller = tigercontrol.controllers('LSTM')
    controller.initialize(n = 1, m = 1, optimizer=OGD) # initialize with class
    controllers.append(controller)

    #controller = tigercontrol.controllers('AutoRegressor')
    #controller.initialize(optimizer=Adagrad) # initialize with class
    #controllers.append(controller)

    controller = tigercontrol.controllers('LSTM')
    controller.initialize(n = 1, m = 1, optimizer=ONS) # initialize with class
    controllers.append(controller)

    #controller = tigercontrol.controllers('AutoRegressor')
    #controller.initialize(optimizer=Adam) # initialize with class
    #controllers.append(controller)

    losses = [[] for i in range(len(controllers))]
    update_time = [0.0 for i in range(len(controllers))]
    for t in tqdm(range(2000)):
        for i in range(len(controllers)):
            l, controller = losses[i], controllers[i]
            y_pred = controller.predict(x)
            l.append(mse(y_pred, y_true))

            t = time.time()
            controller.update(y_true)
            update_time[i] += time.time() - t
        x, y_true = environment.step()

    print("time taken:")
    for t, label in zip(update_time, labels):
        print(label + ": " + str(t))

    if show:
        plt.yscale('log')
        for l, label in zip(losses, labels):
            plt.plot(l, label = label)
            #plt.plot(avg_regret(l), label = label)
        plt.legend()
        plt.title("Autoregressors on ENSO-T6")
        plt.show(block=False)
        plt.pause(300)
        plt.close()
        
    print("test_ons passed")
示例#11
0
def test_pendulum(verbose=False):
    environment = tigercontrol.environment("Pendulum")
    #L = lambda x, u: 1. - x[1] # 1 - cos(theta), where theta=0 is the goal (pendulum pointing up)
    # L = lambda x, u: x[0]**2

    # C_x, C_u = np.diag(np.array([0.1, 0.0, 0.0, 0.0])), np.diag(np.array([0.1]))
    # L = lambda x, u: x.T @ C_x @ x + u.T @ C_u @ u

    dim_x, dim_u = 2, 1
    obs = environment.reset()
    T = 50  # horizon
    '''
    threshold = 0.01
    lamb = 0.1
    max_iterations = 50

    controller = tigercontrol.controller("ILQR_3state")
    controller.initialize(environment, dim_x, dim_u, max_iterations, lamb, threshold)

    if verbose:
        print("Running iLQR...")
    u = controller.plan(obs, T)'''

    total_cost = 0
    #print("u: " + str([float(u_t) for u_t in u]))
    index = 0
    for t in range(10 * T):
        # print("t:" + str(t))
        if verbose:
            environment.render()
            time.sleep(1. / 30.)
        # obs, cost, done = environment.step(u[index])
        obs, cost, done = environment.step(np.zeros((1, )))
        # total_cost += cost
        index += 1
        '''
        if done:
            if verbose:
                print("lasted {} time steps".format(t+1))
            obs = environment.reset()
        '''
        if done or index == T:
            if verbose:
                print("recomputing u...")

                # print(total_cost)
            obs = environment.reset()
            # controller.initialize(environment, dim_x, dim_u, max_iterations, lamb, threshold)
            # u = controller.plan(obs, T)
            total_cost = 0
            index = 0

    environment.close()
    print("test_pendulum passed")
def test_obstacles(verbose=False):
    environment = tigercontrol.environment("PyBullet-Obstacles")
    # obs = environment.reset(render=verbose)

    # controller = tigercontrol.controllers("CartPoleNN")
    # controller.initialize(environment.get_observation_space(), environment.get_action_space())

    # Initial setup
    # Flag that sets if things are visualized
    # GUI = True; # Only for debugging purposes

    GUI = True
    random_seed = 5
    m = 1
    state, params, husky, sphere, obsUid = environment.reset(render=verbose)
    numRays = params['numRays']
    thetas_nominal = params['thetas_nominal']

    # Controller and optimization setup

    # Choose L controllers
    num_x_intercepts = 1
    num_y_intercepts = 1
    L = num_x_intercepts * num_y_intercepts

    x_intercepts = np.linspace(0.1, 5.0, num_x_intercepts)
    y_intercepts = np.linspace(0.0, 10.0, num_y_intercepts)

    print("x_intercepts: " + str(x_intercepts))
    print("y_intercepts: " + str(y_intercepts))
    print("thetas_nominal: " + str(thetas_nominal))
    print("numRays: " + str(numRays))

    K = L * [None]
    for i in range(num_x_intercepts):
        for j in range(num_y_intercepts):
            K[i * num_y_intercepts + j] = np.zeros((numRays, 1))
            for r in range(numRays):
                if (thetas_nominal[r] > 0):
                    K[i * num_y_intercepts + j][r] = y_intercepts[j] * (
                        x_intercepts[i] - thetas_nominal[r]) / x_intercepts[i]
                else:
                    K[i * num_y_intercepts + j][r] = y_intercepts[j] * (
                        -x_intercepts[i] - thetas_nominal[r]) / x_intercepts[i]

    print("First K = " + str(K))

    costs_precomputed = precompute_environment_costs(m, K, L, params, husky,
                                                     sphere, GUI, random_seed,
                                                     environment, obsUid)

    print("costs_precomputed: " + str(costs_precomputed))
    print("test_obstacles passed")
示例#13
0
def test_cartpole(verbose=False):
    environment = tigercontrol.environment("CartPole")
    # environment = CartPole()
    # task = CartPole_v0()

    C_x, C_u = np.diag(np.array([0.1, 0.0, 1.0,
                                 0.0])), np.diag(np.array([0.1]))
    L = lambda x, u: x.T @ C_x @ x + u.T @ C_u @ u
    dim_x, dim_u = 4, 1
    update_period = 75
    obs = environment.reset()

    T = 75  # horizon
    threshold = 0.01
    lamb = 0.1
    max_iterations = 25

    controller = ILQR(environment, max_iterations, lamb, threshold)
    # controller.initialize

    if verbose:
        print("Running iLQR...")
    u = controller.plan(obs, T)

    index = 0
    for t in range(10 * T):
        if verbose:
            environment.render()
            time.sleep(1. / 50.)
        obs = environment.step(u[index])
        index += 1
        '''
        if done:
            if verbose:
                print("lasted {} time steps".format(t+1))
            obs = environment.reset()'''

        if index == T:
            if verbose:
                print("recomputing u...")
            obs = environment.reset()
            u = controller.plan(obs, T)
            # print("u : " + str(u))
            index = 0

    environment.close()
    print("test_cartpole passed")
示例#14
0
def test_kuka(verbose=False):

    environment = tigercontrol.environment("PyBullet-Kuka")
    obs = environment.reset(render=verbose)
    policy = ContinuousDownwardBiasPolicy()

    t_start = time.time()
    while time.time() - t_start < 5:
        done = False
        episode_rew = 0
        while not done:
            if verbose:
                environment.render(mode='human')
            act = policy.sample_action(obs, .1)
            obs, rew, done, _ = environment.step(act)
            episode_rew += rew

    environment.close()
    print("test_kuka passed")
示例#15
0
def test_double_pendulum(verbose=False):
    environment = tigercontrol.environment("DoublePendulum")
    # observe [cos(theta1) sin(theta1) cos(theta2) sin(theta2) thetaDot1 thetaDot2]
    L = lambda x, u: (x[0] - x[2])**2
    dim_x, dim_u = 4, 1
    obs = environment.reset()

    update_period = 75
    T = 75  # horizon
    threshold = 0.01
    lamb = 0.1
    max_iterations = 25

    controller = tigercontrol.controllers("ILQR")
    controller.initialize(environment, L, dim_x, dim_u, update_period,
                          max_iterations, lamb, threshold)

    if verbose:
        print("Running iLQR...")
    # u = controller.plan(obs, T, max_iterations, lamb, threshold)

    index = 0
    for t in range(10 * T):
        if verbose:
            environment.render()
            time.sleep(1. / 15.)
        u = controller.plan(obs)
        obs, r, done, _ = environment.step(u)
        index += 1

        if done:
            if verbose:
                print("solved double pendulum in {} time steps!".format(t + 1))
            obs = environment.reset()
        '''
        if done or index == T:
            if verbose:
                print("recomputing u...")
            u = controller.plan(obs, T, max_iterations, lamb, threshold)
            index = 0'''

    environment.close()
    print("test_double_pendulum passed")
示例#16
0
def test_grid_search_arma(show=False):
    environment_id = "LDS"
    controller_id = "GPC"
    environment_params = {'n':3, 'm':2}
    controller_params = {}
    loss = lambda a, b: np.sum((a-b)**2)
    search_space = {'optimizer':[]} # parameters for LQR controller
    opts = [Adam, Adagrad, ONS, OGD]
    lr_start, lr_stop = 0, -4 # search learning rates from 10^start to 10^stop 
    learning_rates = np.logspace(lr_start, lr_stop, 1+2*np.abs(lr_start - lr_stop))
    for opt, lr in itertools.product(opts, learning_rates):
        search_space['optimizer'].append(opt(learning_rate=lr)) # create instance and append

    trials = 15
    hpo = GridSearch() # hyperparameter optimizer
    optimal_params, optimal_loss = hpo.search(controller_id, controller_params, environment_id, environment_params, loss, 
        search_space, trials=trials, smoothing=10, start_steps=100, verbose=show)

    if show:
        print("optimal loss: ", optimal_loss)
        print("optimal params: ", optimal_params)

    # test resulting controller params
    controller = tigercontrol.controllers(controller_id)
    controller.initialize(**optimal_params)
    environment = tigercontrol.environment(environment_id)
    x = environment.reset(**environment_params)
    loss = []
    if show:
        print("run final test with optimal parameters")
    for t in range(5000):
        y_pred = controller.predict(x)
        y_true = environment.step()
        loss.append(mse(y_pred, y_true))
        controller.update(y_true)
        x = y_true

    if show:
        plt.plot(loss)
        plt.show(block=False)
        plt.pause(10)
        plt.close()
示例#17
0
def test_ant(steps=1000, verbose=False):
    environment = tigercontrol.environment("PyBullet-Ant")
    environment.reset(render=verbose)

    sum_reward = 0
    amplitude1 = 0.5
    amplitude2 = 0.5
    speed = 40

    action = np.random.normal(size=environment.action_space)

    for step_counter in range(steps):
        action = 0.95 * action + np.random.normal(
            size=environment.action_space)
        _, reward, done, _ = environment.step(action)
        time.sleep(1. / 100.)

        sum_reward += reward
        if done:
            environment.reset()
    print("test_ant passed")
示例#18
0
    def _run_test(self, controller_params, smoothing, min_steps, verbose=0):
        """ Run a single test with given controller params, using median stopping rule """
        # initialize environment and controller
        if verbose:
            print("Currently testing parameters: " + str(controller_params))
        controller = tigercontrol.controller(self.controller_id)
        controller.initialize(**controller_params)
        environment = tigercontrol.environment(self.environment_id)
        if environment.has_regressors:
            x, y_true = environment.reset(**self.environment_params)
        else:
            x = environment.reset(**self.environment_params)

        t = 0
        losses = []  # sorted losses, used to get median
        smooth_losses = np.zeros(
            smoothing)  # store previous losses to get smooth loss
        while True:  # run controller until worse than median loss, ignoring first 100 steps
            t += 1
            y_pred = controller.predict(x)
            if environment.has_regressors:
                controller.update(y_true)
                loss = self.loss(y_pred, y_true)
            else:
                x = environment.step()
                controller.update(x)
                loss = self.loss(y_pred, x)
            if t == 1:  # fill all of smooth_losses with the first loss
                for i in range(smoothing):
                    smooth_losses = self._update_smoothing(smooth_losses, loss)
            else:  # else replace only the oldest loss
                smooth_losses = self._update_smoothing(smooth_losses, loss)
            smooth_loss = np.mean(smooth_losses)
            if t % smoothing == 0:
                self._add_to_list(losses, smooth_loss)
                if self._halting_rule(losses, smooth_loss) and t >= min_steps:
                    break
        if verbose:
            print("Final loss: ", smooth_loss)
        return smooth_loss
示例#19
0
def test_sgd_autoregressor(show=False):
    environment = tigercontrol.environment('LDS')
    x = environment.reset(p=2,q=0)

    optimizer = SGD(learning_rate=0.0003)
    controller = tigercontrol.controllers('AutoRegressor')
    controller.initialize(p=3, optimizer=optimizer) # reinitialize with instance

    loss = []
    for t in range(1000):
        y_pred = controller.predict(x)
        y_true = environment.step()
        loss.append(mse(y_pred, y_true))
        controller.update(y_true)
        x = y_true

    if show:
        plt.title("Test SGD on LQR(3) with AutoRegressor controller")
        plt.plot(loss)
        plt.show(block=False)
        plt.pause(3)
        plt.close()
示例#20
0
def test_quadcopter():
    # Controller parameters
    CONTROLLER_PARAMETERS = {
        'Linear_PID':{'P':np.array([300,300,7000]),'I':np.array([0.04,0.04,4.5]),'D':np.array([450,450,5000])},
        'Angular_PID':{'P':np.array([22000,22000,1500]),'I':np.array([0,0,1.2]),'D':np.array([12000,12000,0])},
    }
    GOAL = np.array([0, 0, 2]) # x, y, z
    YAW = 0

    # initialize quadcopter and GUI
    quad_environment = tigercontrol.environment("Quadcopter")
    #quad_environment = quad_environment_class()
    state = quad_environment.reset()

    # initialize model and targets
    quad_controller = QuadcopterModel()
    quad_controller.initialize(CONTROLLER_PARAMETERS, GOAL, YAW)

    # start control loop
    motor_action = None
    for i in range(1000):
        motor_action = quad_controller.get_action(state)
        state = quad_environment.step(motor_action)
    print("test_quadcopter passed")
示例#21
0
def test_dynaboost_lstm(steps=500, show=True):
    # controller initialize
    T = steps
    controller_id = "LSTM"
    ogd = OGD(learning_rate=0.01)
    controller_params = {'n': 1, 'm': 1, 'l': 5, 'h': 10, 'optimizer': ogd}
    controllers = []
    Ns = [1, 3, 6]
    for n in Ns:  # number of weak learners
        controller = tigercontrol.controllers("DynaBoost")
        controller.initialize(controller_id, controller_params, n,
                              reg=1.0)  # regularization
        controllers.append(controller)

    # regular AutoRegressor for comparison
    autoreg = tigercontrol.controllers("AutoRegressor")
    autoreg.initialize(p=4)  # regularization

    # environment initialize
    p, q = 4, 0
    environment = tigercontrol.environment("LDS")
    y_true = environment.reset(p, q, noise_magnitude=0.1)

    # run all boosting controller
    result_list = [[] for n in Ns]
    last_value = []
    autoreg_loss = []
    for i in range(T):
        y_next = environment.step()

        # predictions for every boosting controller
        for result_i, controller_i in zip(result_list, controllers):
            y_pred = controller_i.predict(y_true)
            result_i.append(mse(y_next, y_pred))
            controller_i.update(y_next)

        # last value and autoregressor predictions
        last_value.append(mse(y_true, y_next))
        autoreg_loss.append(mse(autoreg.predict(y_true), y_next))
        autoreg.update(y_next)
        y_true = y_next

    # plot performance
    if show:
        start = 100
        x = np.arange(start, steps)
        plt.figure(figsize=(12, 8))

        # plot every boosting controller loss
        for n, results in zip(Ns, result_list):
            print("Mean loss for n={}: {}".format(
                n, np.mean(np.array(results[start:]))))
            plt.plot(x,
                     avg_regret(results[start:]),
                     label="DynaBoost, n={}".format(n))

        # plot loss for last value and autoregressor controllers
        print("Mean loss for LastValue: {}".format(
            np.mean(np.array(last_value[start:]))))
        plt.plot(x,
                 avg_regret(last_value[start:]),
                 label="Last value controller")
        print("Mean loss for AutoRegressor: {}".format(
            np.mean(np.array(autoreg_loss[start:]))))
        plt.plot(x,
                 avg_regret(autoreg_loss[start:]),
                 label="AutoRegressor controller")

        plt.title("DynaBoost controller on LQR environment")
        plt.legend()
        plt.show(block=False)
        plt.pause(10)
        plt.close()
示例#22
0
def test_dynaboost_arma(steps=500, show=True):
    # controller initialize
    T = steps
    controller_id = "AutoRegressor"
    controller_params = {'p': 18, 'optimizer': OGD}
    Ns = [64]
    timelines = [6, 9, 12]

    # regular AutoRegressor for comparison
    autoreg = tigercontrol.controllers("AutoRegressor")
    autoreg.initialize(p=18, optimizer=OGD)

    fig, ax = plt.subplots(nrows=1, ncols=3)
    cur = 0

    # run all boosting controller
    for timeline in timelines:

        # environment initialize
        environment = tigercontrol.environment("ENSO")
        x, y_true = environment.reset(input_signals=['oni'], timeline=timeline)
        controllers = []

        for n in Ns:  # number of weak learners
            controller = tigercontrol.controllers("DynaBoost")
            controller.initialize(controller_id, controller_params, n,
                                  reg=0.0)  # regularization
            controllers.append(controller)

        result_list = [[] for n in Ns]
        autoreg_loss = []

        for i in tqdm(range(T)):

            # predictions for every boosting controller
            for result_i, controller_i in zip(result_list, controllers):
                y_pred = controller_i.predict(x)
                result_i.append(mse(y_true, y_pred))
                controller_i.update(y_true)

            # last value and autoregressor predictions
            autoreg_loss.append(mse(autoreg.predict(x), y_true))
            autoreg.update(y_true)
            x, y_true = environment.step()

        # plot performance
        if show:

            start = T // 2

            # plot every boosting controller loss
            for n, results in zip(Ns, result_list):
                print("Mean loss for n={}: {}".format(
                    n, np.mean(np.array(results))))
                ax[cur].plot(avg_regret(results[-start:]),
                             label="DynaBoost, n={}".format(n))

            # plot loss for last value and autoregressor controllers
            print("Mean loss for AutoRegressor: {}".format(
                np.mean(np.array(autoreg_loss))))
            ax[cur].plot(avg_regret(autoreg_loss[-start:]),
                         label="AutoRegressor controller")
            ax[cur].legend(loc="upper right", fontsize=8)

        cur += 1

    fig.tight_layout()
    plt.show()
示例#23
0
def test_tigercontrol_environment():
    environment = tigercontrol.environment('LDS')
    return
示例#24
0
import jax.numpy as np
import tigercontrol as tc
import numpy.random as random
import matplotlib.pyplot as plt
from scipy.linalg import solve_discrete_are as dare
from tigercontrol.controllers import Controller

T, H, M, lr = 200, 10, 10, 0.001
n, m, A, B = 2, 1, np.array([[1., 1.], [0., 1.]]), np.array([[0.], [1.]])
Q, R = np.eye(N=n), np.eye(N=m)
x0 = np.zeros((n, 1))

Wproc = lambda n, x, u, w, t: random.normal(size=(n, 1))
Wproc = lambda n, x, u, w, t: np.sin(t / (2 * np.pi)) * np.ones((2, 1))

env = tc.environment('LDS')


class LQR(Controller):
    def __init__(self, A, B, Q, R):
        P = dare(A, B, Q, R)
        self.K = np.linalg.inv(R + B.T @ P @ B) @ (B.T @ P @ A)

    def plan(self, x):
        return -self.K @ x


x = env.initialize(n,
                   m,
                   noise_distribution=Wproc,
                   system_params={