Пример #1
0
    def __init__(self, **kwargs): 

        super(Control, self).__init__(**kwargs)

        self.old_target = [None, None]

        # load up our network
        import glob

        # this code goes into the weights folder, finds the most 
        # recent trial, and loads up the weights
        files = sorted(glob.glob('controllers/weights/rnn*'))
        print 'loading weights from %s'%files[-1]
        W = np.load(files[-1])['arr_0']
        num_states = 4
        self.rnn = RNNet(shape=[num_states * 2, 32, 32, num_states, num_states], 
                     layers=[Linear(), Tanh(), Tanh(), Linear(), Linear()],
                     rec_layers=[1,2],
                     conns={0:[1, 2], 1:[2], 2:[3], 3:[4]},
                     load_weights=W,
                     use_GPU=False)

        offset, W_end, b_end = self.rnn.offsets[(3,4)]
        self.rnn.mask = np.zeros(self.rnn.W.shape, dtype=bool)
        self.rnn.mask[offset:b_end] = True
        self.rnn.W[offset:W_end] = np.eye(4).flatten()

        self.joint_targets = None
        self.act = None

        # set up recorders
        if self.write_to_file is True:
            from recorder import Recorder
            self.u_recorder = Recorder('control signal', self.task, 'hf')
            self.xy_recorder = Recorder('end-effector position', self.task, 'hf')
            self.dist_recorder = Recorder('distance from target', self.task, 'hf')
            self.recorders = [self.u_recorder, 
                            self.xy_recorder, 
                            self.dist_recorder]
Пример #2
0
def test_plant():
    """Example of a network using a dynamic plant as the output layer."""

    eps = 1e-6  # value to use for finite differences computations
    dt = 1e-2  # size of time step
    sig_len = 40  # how many time steps to train over
    batch_size = 32  # how many updates to perform with static input
    num_batches = 20000  # how many batches to run total

    import sys
    # NOTE: Change to wherever you keep your arm models
    sys.path.append("../../../studywolf_control/studywolf_control/")
    from arms.two_link.arm_python import Arm as Arm
    print 'Plant is: ', Arm
    arm = Arm(dt=dt, init_q=[0.736134824578, 1.85227640003])

    num_states = arm.DOF * 2  # are states are [positions, velocities]
    targets = gen_targets(arm=arm, sig_len=sig_len)  # target joint angles
    init_state = np.zeros((len(targets), num_states))  # initial velocity = 0
    init_state[:, :arm.DOF] = arm.init_q  # set up the initial joint angles
    plant = PlantArm(arm=arm, targets=targets, init_state=init_state, eps=eps)

    # open up weights folder and checked for saved weights
    import glob
    files = sorted(glob.glob('weights/rnn*'))
    if len(files) > 0:
        # if weights found, load them up and keep going from last trial
        W = np.load(files[-1])['arr_0']
        print 'loading from ', files[-1]
        last_trial = int(
            files[-1].split('weights/rnn_weights-trial')[1].split('-err')[0])
        print 'last_trial: ', last_trial
    else:
        # if no weights found, start fresh with new random seed
        W = None
        last_trial = -1
        seed = np.random.randint(100000000)
        print 'seed : ', seed
        np.random.seed(seed)

    # specify the network structure and loss functions
    from hessianfree.loss_funcs import SquaredError, SparseL2
    rnn = RNNet(
        # specify the number of nodes in each layer
        shape=[num_states * 2, 32, 32, num_states, num_states],
        # specify the function of the nodes in each layer
        layers=[Linear(), Tanh(), Tanh(),
                Linear(), plant],
        # specify the layers that have recurrent connections
        rec_layers=[1, 2],
        # specify the connections between layers
        conns={
            0: [1, 2],
            1: [2],
            2: [3],
            3: [4]
        },
        # specify the loss function
        loss_type=[
            # squared error between plant output and targets
            SquaredError()
        ],
        load_weights=W,
        use_GPU=False)

    # set up masking so that weights between network output
    # and the plant aren't modified in learning, always = 1
    offset, W_end, b_end = rnn.offsets[(3, 4)]
    rnn.mask = np.zeros(rnn.W.shape, dtype=bool)
    rnn.mask[offset:b_end] = True
    rnn.W[offset:W_end] = np.eye(4).flatten()

    for ii in range(last_trial + 1, num_batches):
        print '============================================='
        print 'training batch ', ii
        err = rnn.run_epochs(plant,
                             None,
                             max_epochs=batch_size,
                             optimizer=HessianFree(CG_iter=96,
                                                   init_damping=100))
        # save the weights to file, track trial and error
        err = rnn.best_error
        name = 'weights/rnn_weights-trial%04i-err%.5f' % (ii, err)
        np.savez_compressed(name, rnn.W)
        print '============================================='
        print 'network: ', name
        print 'final error: ', err
        print '============================================='

    return rnn.best_error
Пример #3
0
def gen_data_plot(folder="weights",
                  index=-1,
                  show_plot=True,
                  save_plot=None,
                  save_paths=False,
                  verbose=True):

    files = sorted(glob.glob('%s/rnn*' % folder))

    # plot the values over time
    vals = []

    for ii, name in enumerate(files[:index]):
        if verbose: print name
        name = name.split('err')[1]
        name = name.split('.npz')[0]
        vals.append(float(name))

    vals = np.array(vals)

    plt.figure(figsize=(10, 3))
    ax = plt.subplot2grid((1, 3), (0, 0), colspan=2)
    ax.loglog(vals)
    ax.loglog(range(len(vals)), np.ones(len(vals)) * min(vals), 'r--')
    ax.loglog(range(len(vals)), np.ones(len(vals)) * min(vals), 'r--')
    plt.xlim([0, len(files)])
    plt.ylim([10**-5, 10])
    plt.title('AHF training error')
    plt.xlabel('Training iterations')
    plt.ylabel('Error')
    plt.yscale('log')

    # load in the weights and see how well they control the arm
    dt = 1e-2
    sig_len = 40

    # HACK: append system path to have access to the arm code
    # NOTE: Change this path to wherever your plant model is kept!
    sys.path.append("../../../studywolf_control/studywolf_control/")
    from arms.two_link.arm_python import Arm as Arm
    if verbose: print 'Plant is: ', Arm
    arm = Arm(dt=dt, init_q=[0.736134824578, 1.85227640003])

    from hessianfree import RNNet
    from hessianfree.nonlinearities import (Tanh, Linear)
    from train_hf import PlantArm, gen_targets

    init_type = "sparse"
    rec_coeff = [1, 1]
    rec_type = "sparse"
    eps = 1e-6

    num_states = 4
    targets = gen_targets(arm, sig_len=sig_len)
    init_state = np.zeros((len(targets), num_states))
    init_state[:, 0] = 0.736134824578
    init_state[:, 1] = 1.85227640003
    plant = PlantArm(arm, targets=targets, init_state=init_state, eps=eps)

    W = np.load(files[index])['arr_0']
    last_trial = -1

    # make sure this network is the same as the one you trained!
    from hessianfree.loss_funcs import SquaredError, SparseL2
    suts_gain = 10e-2 * 1e-2  # for centre-out reaching task
    net_size = 96
    if '32' in folder:
        net_size = 32
    rnn = RNNet(
        shape=[num_states * 2, net_size, net_size, num_states, num_states],
        layers=[Linear(), Tanh(), Tanh(),
                Linear(), plant],
        debug=False,
        rec_layers=[1, 2],
        conns={
            0: [1, 2],
            1: [2],
            2: [3],
            3: [4]
        },
        W_rec_params={
            "coeff": rec_coeff,
            "init_type": rec_type
        },
        load_weights=W,
        use_GPU=False)

    full_output = rnn.forward(plant, rnn.W)
    outputs = full_output[-1]
    states = np.asarray(plant.get_vecs()[0][:, :, num_states:])
    targets = np.asarray(plant.get_vecs()[1])

    def kin(q):
        L = [.31, .27]
        q0 = q[:, 0]
        q1 = q[:, 1]
        return [
            L[0] * np.cos(q0) + L[1] * np.cos(q0 + q1),
            L[0] * np.sin(q0) + L[1] * np.sin(q0 + q1)
        ]

    ax = plt.subplot2grid((1, 3), (0, 2))
    # plot start point
    initx, inity = kin(init_state)
    ax.plot(initx, inity, 'x', mew=10)
    for jj in range(0, len(targets)):
        # plot target
        targetx, targety = kin(targets[jj])
        ax.plot(targetx, targety, 'rx', mew=1)
        # plat path
        pathx, pathy = kin(states[jj, :, :])
        path = np.hstack([pathx[:, None], pathy[:, None]])
        if save_paths is True:
            np.savez_compressed('end-effector position%.3i.npz' % int(jj / 8),
                                array1=path)
        ax.plot(path[:, 0], path[:, 1])

    plt.tight_layout()
    plt.xlim([-.1, .1])
    plt.ylim([.25, .45])
    plt.title('Hand trajectory')
    plt.xlabel('x')
    plt.ylabel('y')

    if save_plot is not None:
        plt.savefig(save_plot)
    if show_plot is True:
        plt.show()
    plt.close()
Пример #4
0
def gen_data_plot(folder="weights", index=None, show_plot=True,
                  save_plot=None, save_paths=False, verbose=True):

    files = sorted(glob.glob('%s/rnn*' % folder))
    files = files[:index] if index is not None else files

    # plot the values over time
    vals = []

    for ii, name in enumerate(files):
        if verbose:
            print(name)
        name = name.split('err')[1]
        name = name.split('.npz')[0]
        vals.append(float(name))

    vals = np.array(vals)

    plt.figure(figsize=(10, 3))
    ax = plt.subplot2grid((1, 3), (0, 0), colspan=2)
    ax.loglog(vals)
    ax.loglog(range(len(vals)), np.ones(len(vals)) * min(vals), 'r--')
    ax.loglog(range(len(vals)), np.ones(len(vals)) * min(vals), 'r--')
    plt.xlim([0, len(files)])
    plt.ylim([10**-5, 10])
    plt.title('AHF training error')
    plt.xlabel('Training iterations')
    plt.ylabel('Error')
    plt.yscale('log')

    # load in the weights and see how well they control the arm
    dt = 1e-2
    sig_len = 40

    # HACK: append system path to have access to the arm code
    # NOTE: Change this path to wherever your plant model is kept!
    sys.path.append("../../../studywolf_control/studywolf_control/")
    # from arms.two_link.arm_python import Arm as Arm
    from arms.three_link.arm import Arm as Arm
    if verbose:
        print('Plant is: %s' % str(Arm))
    arm = Arm(dt=dt)

    from hessianfree import RNNet
    from hessianfree.nonlinearities import (Tanh, Linear)
    from train_hf_3link import PlantArm, gen_targets

    rec_coeff = [1, 1]
    rec_type = "sparse"
    eps = 1e-6

    num_states = arm.DOF * 2
    targets = gen_targets(arm, sig_len=sig_len)
    init_state = np.zeros((len(targets), num_states), dtype=np.float32)
    init_state[:, :arm.DOF] = arm.init_q # set up the initial joint angles
    plant = PlantArm(arm, targets=targets,
                     init_state=init_state, eps=eps)

    index = -1 if index is None else index
    W = np.load(files[index])['arr_0']

    # make sure this network is the same as the one you trained!
    net_size = 96
    if '32' in folder:
        net_size = 32
    rnn = RNNet(shape=[num_states * 2,
                       net_size,
                       net_size,
                       num_states,
                       num_states],
                layers=[Linear(), Tanh(), Tanh(), Linear(), plant],
                debug=False,
                rec_layers=[1, 2],
                conns={0: [1, 2], 1: [2], 2: [3], 3: [4]},
                W_rec_params={"coeff": rec_coeff, "init_type": rec_type},
                load_weights=W,
                use_GPU=False)

    rnn.forward(plant, rnn.W)
    states = np.asarray(plant.get_vecs()[0][:, :, num_states:])
    targets = np.asarray(plant.get_vecs()[1])

    def kin(q):
        x = np.sum([arm.L[ii] * np.cos(np.sum(q[:, :ii+1], axis=1))
                    for ii in range(arm.DOF)], axis=0)
        y = np.sum([arm.L[ii] * np.sin(np.sum(q[:, :ii+1], axis=1))
                    for ii in range(arm.DOF)], axis=0)
        return x,y

    ax = plt.subplot2grid((1, 3), (0, 2))
    # plot start point
    initx, inity = kin(init_state)
    ax.plot(initx, inity, 'x', mew=10)
    for jj in range(0, len(targets)):
        # plot target
        targetx, targety = kin(targets[jj])
        ax.plot(targetx, targety, 'rx', mew=1)
        # plat path
        pathx, pathy = kin(states[jj, :, :])
        path = np.hstack([pathx[:, None], pathy[:, None]])
        if save_paths is True:
            np.savez_compressed('end-effector position%.3i.npz' % int(jj/8),
                                array1=path)
        ax.plot(path[:, 0], path[:, 1])

    plt.tight_layout()
    # plt.xlim([-.1, .1])
    # plt.ylim([.25, .45])
    plt.title('Hand trajectory')
    plt.xlabel('x')
    plt.ylabel('y')

    if save_plot is not None:
        plt.savefig(save_plot)
    if show_plot is True:
        plt.show()
    plt.close()
Пример #5
0
def test_plant(use_GPU):
    n_inputs = 32
    sig_len = 15

    class Plant(hf.nl.Plant):
        # this plant implements a simple dynamic system, with two-dimensional
        # state representing [position, velocity]
        def __init__(self, A, B, targets, init_state):
            super(Plant, self).__init__(stateful=True)

            self.A = np.asarray(A)
            self.B = B

            self.targets = targets
            self.init_state = init_state

            self.shape = [n_inputs, sig_len, len(A)]

            # derivative of output with respect to state (constant, so just
            # compute it once here)
            self.d_output = np.resize(np.eye(
                self.shape[-1]), (n_inputs, self.shape[-1], self.shape[-1], 1))

            self.reset()

        def activation(self, x):
            self.act_count += 1

            # this implements a basic s_{t+1} = A*s_t + B*x dynamic system.
            # but to make things a little more complicated we allow the B
            # matrix to be dynamic, so it's actually
            # s_{t+1} = A*s_t + B(s_t)*x

            self.B_matrix, self.d_B_matrix = self.B(self.state)

            self.state = (np.dot(self.state, self.A) +
                          np.einsum("ij,ijk->ik", x, self.B_matrix))

            return self.state[:x.shape[0]]

        def d_activation(self, x, _):
            self.d_act_count += 1
            assert self.act_count == self.d_act_count

            # derivative of state with respect to input
            d_input = self.B_matrix.transpose((0, 2, 1))[..., None]

            # derivative of state with respect to previous state
            d_state = np.resize(self.A.T,
                                np.concatenate(([x.shape[0]], self.A.shape)))
            d_state[:, 1, 0] += x[:, 1] * self.d_B_matrix[:, 1, 1]
            d_state = d_state[..., None]

            return np.concatenate((d_input, d_state, self.d_output), axis=-1)

        def __call__(self, _):
            self.inputs = np.concatenate((self.inputs, self.state[:, None, :]),
                                         axis=1)
            return self.state

        def get_inputs(self):
            return self.inputs

        def get_targets(self):
            return self.targets

        def reset(self, init=None):
            self.act_count = 0
            self.d_act_count = 0
            self.state = (self.init_state.copy()
                          if init is None else init.copy())
            self.inputs = np.zeros((self.shape[0], 0, self.shape[-1]),
                                   dtype=np.float32)
            self.B_matrix = self.d_B_matrix = None

    # static A matrix (converts velocity into a change in position)
    A = [[1, 0], [0.2, 1]]

    # dynamic B(s) matrix (converts input into velocity, modulated by current
    # state)
    # note that this dynamic B matrix doesn't really make much sense, it's
    # just here to demonstrate what happens with a plant whose dynamics
    # change over time
    def B(state):
        B = np.zeros((state.shape[0], state.shape[1], state.shape[1]))
        B[:, 1, 1] = np.tanh(state[:, 0])

        d_B = np.zeros((state.shape[0], state.shape[1], state.shape[1]))
        d_B[:, 1, 1] = 1 - np.tanh(state[:, 0])**2

        return B, d_B

    # initial position
    init_state = np.zeros((n_inputs, 2))
    init_state[:, 0] = np.linspace(-1, 1, n_inputs)

    # the target will be to end at position 1 with velocity 0
    targets = np.ones((n_inputs, sig_len, 2), dtype=np.float32)
    targets[:, :, 1] = 0
    targets[:, :-1, :] = np.nan

    plant = Plant(A, B, targets, init_state)

    rnn = hf.RNNet(shape=[2, 16, 2],
                   layers=[Linear(), Tanh(), plant],
                   W_init_params={"coeff": 0.1},
                   W_rec_params={"coeff": 0.1},
                   use_GPU=use_GPU,
                   rng=np.random.RandomState(0),
                   debug=False)

    rnn.run_batches(plant,
                    None,
                    HessianFree(CG_iter=20, init_damping=10),
                    max_epochs=150,
                    plotting=True,
                    print_period=None)

    outputs = rnn.forward(plant, rnn.W)

    try:
        assert rnn.loss.batch_loss(outputs, targets) < 1e-2
    except AssertionError:
        plt.figure()
        plt.plot(outputs[-1][:, :, 0].squeeze().T)
        plt.plot(outputs[-1][:, :, 1].squeeze().T)
        plt.title("outputs")
        plt.savefig("test_plant_outputs.png")

        raise