def rnn_cell_fa(rnn_input, state, W, U, B):
     ones0 = tf.ones([batch_size, 1], tf.float32)
     state_p = tf.concat([state, ones0], 1)
     #return activation(tf.matmul(rnn_input[:,0:-1:2], U) + tf_matmul_r(state_p, W, B))
     return activation(
         tf_matmul_r(rnn_input[:, 0:-1:2], U, B[0:2, :]) +
         tf_matmul_r(state_p, W, B))
y_p_man = tf.matmul(h_aug_man, W)

loss_man = tf.reduce_sum(tf.pow(y_p_man - y, 2)) / 2
grad_W_man = tf.gradients(xs=W, ys=loss_man)[0]
e_man = (y_p_man - y)
h_prime_man = h_man * (1 - h_man)
grad_A_manual = tf.matmul(
    tf.transpose(x_aug),
    tf.multiply(h_prime_man, tf.matmul(e_man, tf.transpose(B[0:m, :]))))

#FA, computed automatically
x_aug = tf.concat([x, e0], 1)
h = tf.sigmoid(tf.matmul(x_aug, A))
h_aug = tf.concat([h, e1], 1)
#The key line! Replace W with B in any backprop step
y_p = tf_matmul_r(h_aug, W, B)
#y_p = tf.matmul(h_aug, W)

loss = tf.reduce_sum(tf.pow(y_p - y, 2)) / 2
grad_W_auto = tf.gradients(xs=W, ys=loss)[0]
grad_A_auto = tf.gradients(xs=A, ys=loss)[0]

norms_W = np.zeros(n_tests)
norms_A = np.zeros(n_tests)

#Compare to overridden matmul functions
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(n_tests):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        feed_dict = {x: batch_x, y: batch_y}
def main():
    args = get_args()
    method = args.method
    save = bool(args.save)

    # Global config variables
    anneal = True
    num_steps = 10  # number of truncated backprop steps ('n' in the discussion above)
    batch_size = 20
    in_dim = 4
    state_size = 50
    learning_rate = 1e-3
    alpha2 = 1
    activation = tf.tanh
    act_prime = lambda x: 1.0 - tf.multiply(x, x)
    acclimatize = True
    grad_max = 10
    N_epochs = 10000
    N_episodes = 10
    n_runs = 1
    delay = 2

    #Node pert params
    lmbda = 5e-3
    var_xi = 0.01
    p_fire = 1.0  #prob of firing

    beta = 0.1

    report_rate = 100
    fn_out = './experiments/cartpole_rnn_partialobs_sgdnp/%s_learning_rate_%f_lmbda_%f_varxi_%f_multipleruns.npz' % (
        method, learning_rate, lmbda, var_xi)

    #Things to save with output
    params = {
        'num_steps': num_steps,
        'batch_size': batch_size,
        'in_dim': in_dim,
        'state_size': state_size,
        'learning_rate': learning_rate,
        'alpha2': alpha2,
        'lmbda': lmbda,
        'var_xi': var_xi,
        'p_fire': p_fire,
        'grad_max': grad_max,
        'N_epochs': N_epochs,
        'N_episodes': N_episodes,
        'acclimatize': acclimatize
    }

    print("Using %s" % method)
    print("For %d epochs" % N_epochs)
    print("Learning rate: %f" % learning_rate)
    print("Lambda learning rate: %f" % lmbda)
    print("Variance xi: %f" % var_xi)
    print("Saving results: %d" % save)

    def rnn_cell_bp(rnn_input, state, W, U, B):
        ones0 = tf.ones([batch_size, 1], tf.float32)
        state_p = tf.concat([state, ones0], 1)
        return activation(
            tf.matmul(rnn_input[:, 0:-1:2], U) + tf.matmul(state_p, W))

    def rnn_cell_fa(rnn_input, state, W, U, B):
        ones0 = tf.ones([batch_size, 1], tf.float32)
        state_p = tf.concat([state, ones0], 1)
        #return activation(tf.matmul(rnn_input[:,0:-1:2], U) + tf_matmul_r(state_p, W, B))
        return activation(
            tf_matmul_r(rnn_input[:, 0:-1:2], U, B[0:2, :]) +
            tf_matmul_r(state_p, W, B))

    if method == 'backprop':
        rnn_cell = rnn_cell_bp
    else:
        rnn_cell = rnn_cell_fa

    def train_network(num_episodes,
                      num_steps,
                      state_size=state_size,
                      verbose=True,
                      n_runs=5):
        xs = np.zeros(
            (n_runs, N_epochs, num_episodes, num_steps, batch_size, in_dim))
        for run_idx in range(n_runs):
            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                training_losses = []
                alignments = []

                for idx in range(N_epochs):
                    print("Epoch: %d" % idx)
                    if idx < 4 and acclimatize:
                        ts = train_step_B
                    else:
                        ts = train_step
                    training_loss = 0
                    training_x = np.zeros((batch_size, in_dim))
                    training_state = np.zeros((batch_size, state_size))
                    for step in range(num_episodes):
                        tr_init_gradW = np.zeros((state_size + 1, state_size))
                        tr_init_gradB = np.zeros((state_size + 1, state_size))
                        tr_init_gradC = np.zeros((state_size + 1, 1))
                        tr_init_gradU = np.zeros((int(in_dim / 2), state_size))
                        tr_loss, tr_losses, training_loss_, training_state, training_x, _, align, x_o = \
                            sess.run([loss, losses, total_loss, final_state, final_x, ts, aments, rnn_inputs], \
                                          feed_dict={init_state:training_state, init_x: training_x, \
                                          init_gradU: tr_init_gradU, init_gradW: tr_init_gradW, \
                                          init_gradB: tr_init_gradB, init_gradC: tr_init_gradC})
                        xs[run_idx, idx,
                           step, :, :, :] = np.array(x_o)[:, :, :]
                        training_loss += training_loss_
                    if idx % report_rate == 0 and idx > 0:
                        if verbose:
                            print("Average loss at epoch %d for last %d steps: %f"%(idx, report_rate, \
                                                                                   training_loss/report_rate/num_episodes))
                        training_losses.append(training_loss / report_rate /
                                               num_episodes)
                        alignments.append(align)
                        training_loss = 0

        return training_losses, step, alignments, xs

    ##############
    ## BACKPROP ##
    ##############

    def backprop():
        grad_B = init_gradB
        grad_C = init_gradC
        alnments = []
        grad_V = tf.gradients(xs=V, ys=total_loss)[0]
        grad_W = tf.gradients(xs=W, ys=total_loss)[0]
        grad_U = tf.gradients(xs=U, ys=total_loss)[0]
        return grad_U, grad_W, grad_B, grad_C, grad_V, alnments

    ###############
    ## NODE PERT ##
    ###############

    def nodepert():
        grad_B = init_gradB
        grad_C = init_gradC
        alnments = []
        for i in range(num_steps):
            for j in range(i + 1 - delay)[::-1]:
                print(i, j)
                np_est = tf.matmul(
                    tf.diag(loss_pert[i] - loss[i]) / var_xi / var_xi,
                    noise_outputs[j])
                delta = tf.gradients(xs=rnn_outputs[j], ys=loss[i])[0]
                #print(i,j)
                #print(delta)
                aux_loss = tf.reduce_sum(tf.pow(np_est - delta, 2))
                grad_B += tf.squeeze(tf.gradients(xs=B, ys=aux_loss))

        grad_V = tf.gradients(xs=V, ys=total_loss)[0]
        grad_W = tf.gradients(xs=W, ys=total_loss)[0]
        grad_U = tf.gradients(xs=U, ys=total_loss)[0]
        return grad_U, grad_W, grad_B, grad_C, grad_V, alnments

    if method == 'backprop':
        trainer = backprop
    elif method == 'feedbackalignment':
        trainer = backprop
    elif method == 'nodepert':
        trainer = nodepert
    else:
        raise NotImplementedError

    init_x = tf.zeros([batch_size, in_dim], dtype=np.float32)
    init_state = tf.zeros([batch_size, state_size], dtype=np.float32)
    init_gradW = tf.zeros([state_size + 1, state_size], dtype=np.float32)
    init_gradB = tf.zeros([state_size + 1, state_size], dtype=np.float32)
    init_gradC = tf.zeros([state_size + 1, 1], dtype=np.float32)
    init_gradU = tf.zeros([in_dim / 2, state_size], dtype=np.float32)
    alignment = tf.zeros([in_dim / 2, state_size], dtype=np.float32)

    ones0 = tf.ones([batch_size, 1], tf.float32)
    U = tf.Variable(rng.randn(int(in_dim / 2), state_size) * alpha2,
                    name="input_weights",
                    dtype=tf.float32)
    W = tf.Variable(rng.randn(state_size + 1, state_size) * alpha2,
                    name="feedforward_weights",
                    dtype=tf.float32)
    V = tf.Variable(rng.randn(state_size + 1, 1) * alpha2,
                    name="output_weights",
                    dtype=tf.float32)

    B = tf.Variable(rng.randn(state_size + 1, state_size) * alpha2,
                    name="feedback_weights",
                    dtype=tf.float32)
    C = tf.Variable(rng.randn(state_size + 1, 1) * alpha2,
                    name="feedback_weights",
                    dtype=tf.float32)

    ##############################################
    ## Define the cartpole dynamics and network ##
    ##############################################

    x = init_x
    state = init_state
    state_p = init_state
    rnn_inputs = []
    rnn_outputs = []
    rnn_pert_outputs = []
    noise_outputs = []
    heights = []
    hs = []
    actions = []

    m = 1.1
    mp = 0.1
    g = 9.8
    l = 0.5
    tau = 0.04
    Fmax = 10
    max_h = 3
    gamma = 10

    #Equations of motion:
    #theta_dd = (m*g*sin(theta) - cos(theta)*(F + mp*l*theta_d*theta_d*sin(theta)))/((4/3)*m*l - mp*l*cos(theta)*cos(theta))
    #theta_d += tau*theta_dd
    #theta += tau*theta
    #h_dd = (F + mp*l*(theta_d*theta_d*sin(theta)-theta_dd*cos(theta)))/m
    #h_d += tau*h_dd
    #h += tau*h_d

    for idx in range(num_steps):
        mask = tf.random_uniform([batch_size, state_size]) < p_fire
        xi = tf.multiply(
            tf.random_normal([batch_size, state_size]) * var_xi,
            tf.to_float(mask))
        phi = tf.random_normal((batch_size, 1)) * Fmax / 500
        #Compute new state
        state = rnn_cell(x, state, W, U, B)
        state_p = rnn_cell(x, state_p, W, U, B) + xi[:, 0:state_size]
        #Compute action
        if method == 'backprop':
            action = tf.matmul(tf.concat([state, ones0], 1), V)
        else:
            action = tf_matmul_r(tf.concat([state, ones0], 1), V, C)
        actions.append(action)

        d_idx = max(0, idx - delay)
        #d_idx = idx
        F = tf.squeeze(Fmax * activation(actions[d_idx]) + phi)
        #Compute new x
        theta_dd = (m*g*tf.sin(x[:,1]) - tf.cos(x[:,1])*(F + mp*l*x[:,0]*x[:,0]*tf.sin(x[:,1])))/((4/3)*m*l -\
                    mp*l*tf.cos(x[:,1])*tf.cos(x[:,1]))
        h_dd = (F + mp * l * (x[:, 0] * x[:, 0] * tf.sin(x[:, 1]) -
                              theta_dd * tf.cos(x[:, 1]))) / m

        #h_dd = (F - mp*l*x[:,0]*x[:,0]*tf.sin(x[:,1]) + mp*g*tf.sin(x[:,1])*tf.cos(x[:,1]))/(m - mp*tf.cos(x[:,1])*tf.cos(x[:,1]))
        #theta_dd = (h_dd*tf.cos(x[:,1]) + g*tf.sin(x[:,1]))/l

        x_list = []
        x_list.append(x[:, 0] + tau * theta_dd)  #x0 = theta_dot
        x_list.append(x[:, 1] + tau * x[:, 0])  #x1 = theta
        x_list.append(x[:, 2] + tau * h_dd)  #x2 = h_dot
        x_list.append(x[:, 3] + tau * x[:, 2])  #x3 = h
        #x_list.append(tf.clip_by_value(x[:,3] + tau*x[:,2], -4*max_h, 4*max_h))     #x3 = h
        x = tf.stack(x_list, axis=1)
        #height = tf.cos(x[:,1])
        height = x[:, 1]
        heights.append(height)
        hs.append(x[:, 2])
        rnn_inputs.append(x)
        rnn_outputs.append(state)
        rnn_pert_outputs.append(state_p)
        noise_outputs.append(xi)

    final_x = rnn_inputs[-1]
    final_state = rnn_outputs[-1]

    #Define loss function....
    loss = [
        gamma * tf.pow(height, 2) / 2 +
        tf.pow(tf.maximum(0.0,
                          tf.abs(h) - max_h), 2) / 2
        for h, height in zip(hs, heights)
    ]
    losses = [
        gamma * tf.reduce_sum(tf.pow(height, 2)) / 2 +
        tf.pow(tf.maximum(0.0,
                          tf.abs(h) - max_h), 2) / 2
        for h, height in zip(hs, heights)
    ]
    total_loss = tf.reduce_mean(losses)

    #Perturbed outputs and loss
    loss_pert = [
        gamma * tf.pow(height, 2) / 2 +
        tf.pow(tf.maximum(0.0,
                          tf.abs(h) - max_h), 2) / 2
        for h, height in zip(hs, heights)
    ]
    losses_pert = [
        gamma * tf.reduce_sum(tf.pow(height, 2)) / 2 +
        tf.pow(tf.maximum(0.0,
                          tf.abs(h) - max_h), 2) / 2
        for h, height in zip(hs, heights)
    ]
    total_loss_pert = tf.reduce_mean(losses_pert)

    ##################################################

    grad_U, grad_W, grad_B, grad_C, grad_V, aments = trainer()
    new_U = U.assign(U - learning_rate * grad_U)
    new_W = W.assign(W - learning_rate * grad_W)
    new_V = V.assign(V - learning_rate * grad_V)

    new_C = C.assign(C - lmbda *
                     tf.clip_by_value(grad_C, -grad_max, grad_max, name=None))
    new_B = B.assign(B - lmbda *
                     tf.clip_by_value(grad_B, -grad_max, grad_max, name=None))
    train_step_B = [new_B, new_C]
    train_step = [new_U, new_W, new_V, new_B, new_C]

    #Save training losses, params, number of runs in epoch, alignment to BP
    all_losses = []
    all_alignments = []
    training_losses, n_in_epoch, alignments, xs = train_network(N_episodes,
                                                                num_steps,
                                                                n_runs=n_runs)
    all_losses.append(training_losses)
    all_alignments.append(alignments)

    if save:
        with open(fn_out, 'wb') as f:
            to_save = {
                'all_losses': all_losses,
                'all_alignments': all_alignments,
                'n_in_epoch': n_in_epoch,
                'params': params,
                'xs': xs
            }
            pickle.dump(to_save, f)
예제 #4
0
    def build_model(self):
        self.is_training = tf.placeholder(tf.bool)
        self.x = tf.placeholder(tf.float32, shape=[None] + self.config.state_size)
        self.y = tf.placeholder(tf.float32, shape=[None, 10])

        # set initial feedforward and feedback weights
        m = 50
        j = 20
        n = 10
        p = self.config.state_size[0]

        #Scale weight initialization
        alpha0 = np.sqrt(2.0/p)
        alpha1 = np.sqrt(2.0/m)
        alpha2 = np.sqrt(2.0/j)
        alpha3 = 1

        #Plus one for bias terms
        A = tf.Variable(rng.randn(p+1,m)*alpha0, name="hidden_weights", dtype=tf.float32)
        W1 = tf.Variable(rng.randn(m+1,j)*alpha1, name="hidden_weights2", dtype=tf.float32)
        W2 = tf.Variable(rng.randn(j+1,n)*alpha2, name="output_weights", dtype=tf.float32)
        B1 = tf.Variable(rng.randn(m+1,j)*alpha1, name="feedback_weights1", dtype=tf.float32)
        B2 = tf.Variable(rng.randn(j+1,n)*alpha2, name="feedback_weights2", dtype=tf.float32)

        # network architecture with ones added for bias terms
        e0 = tf.ones([self.config.batch_size, 1], tf.float32)
        e1 = tf.ones([self.config.batch_size, 1], tf.float32)
        x_aug = tf.concat([self.x, e0], 1)
        h1 = tf.sigmoid(tf.matmul(x_aug, A))
        h1_aug = tf.concat([h1, e1], 1)
        h2 = tf.sigmoid(tf_matmul_r(h1_aug, W1, B1))
        h2_aug = tf.concat([h2, e1], 1)
        y_p = tf_matmul_r(h2_aug, W2, B2)

        with tf.name_scope("loss"):
            #mean squared error
            #cost = tf.reduce_sum(tf.pow(y_p-self.y, 2))/2/self.config.batch_size
            self.loss = tf.reduce_sum(tf.pow(y_p-self.y, 2))/2
            grad_W2 = tf.gradients(xs=W2, ys=self.loss)[0]
            grad_W1 = tf.gradients(xs=W1, ys=self.loss)[0]
            grad_A = tf.gradients(xs=A, ys=self.loss)[0]
            e = (y_p - self.y)
            h1_prime = tf.multiply(h1_aug, 1-h1_aug)[:,0:m]
            h2_prime = tf.multiply(h2_aug, 1-h2_aug)[:,0:j]

            #FA
            lmda2 = tf.matmul(e, tf.transpose(B2[0:j,:]))
            d2 = np.multiply(h2_prime, lmda2)

            new_W2 = W2.assign(W2 - self.config.learning_rate*grad_W2)
            new_W1 = W1.assign(W1 - self.config.learning_rate*grad_W1)
            new_A = A.assign(A - self.config.learning_rate*grad_A)            
            self.train_step = [new_W2, new_W1, new_A]
            correct_prediction = tf.equal(tf.argmax(y_p, 1), tf.argmax(self.y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

            #Save training metrics
            Bs = [B1, B2]
            Ws = [W1, W2]
            es = [d2, e]
            dls = []
            ls = [h1_aug, h2_aug]
            self._set_training_metrics(es, Bs, Ws)
예제 #5
0
def fa_layer(prev, input_size, output_size):
    W = weight_variable([input_size, output_size])
    B = weight_variable([input_size, output_size])
    b = bias_variable([output_size])
    return tf_matmul_r(prev, W, B) + b
예제 #6
0
    def build_model(self):
        self.is_training = tf.placeholder(tf.bool)
        self.x = tf.placeholder(tf.float32, shape=[None] + self.config.state_size)
        self.y = tf.placeholder(tf.float32, shape=[None, 10])

        # set initial feedforward and feedback weights
        m = 512
        j = 200
        #m = 50
        #j = 20
        n = 10
        p = self.config.state_size[0]

        #Scale weight initialization
        alpha0 = np.sqrt(2.0/p)
        alpha1 = np.sqrt(2.0/m)
        alpha2 = np.sqrt(2.0/j)
        alpha3 = 1

        #Plus one for bias terms
        A = tf.Variable(rng.randn(p+1,m)*alpha0, name="hidden_weights", dtype=tf.float32)
        W1 = tf.Variable(rng.randn(m+1,j)*alpha1, name="hidden_weights2", dtype=tf.float32)
        W2 = tf.Variable(rng.randn(j+1,n)*alpha2, name="output_weights", dtype=tf.float32)
        B1 = tf.Variable(rng.randn(m+1,j)*alpha1, name="feedback_weights1", dtype=tf.float32)
        B2 = tf.Variable(rng.randn(j+1,n)*alpha2, name="feedback_weights2", dtype=tf.float32)

        # network architecture with ones added for bias terms
        e0 = tf.ones([self.config.batch_size, 1], tf.float32)
        e1 = tf.ones([self.config.batch_size, 1], tf.float32)
        x_aug = tf.concat([self.x, e0], 1)
        h1 = tf.sigmoid(tf.matmul(x_aug, A))
        h1_aug = tf.concat([h1, e1], 1)
        h2 = tf.sigmoid(tf_matmul_r(h1_aug, W1, B1))
        h2_aug = tf.concat([h2, e1], 1)
        y_p = tf_matmul_r(h2_aug, W2, B2)

        with tf.name_scope("loss"):
            #mean squared error
            #cost = tf.reduce_sum(tf.pow(y_p-self.y, 2))/2/self.config.batch_size
            self.loss = tf.reduce_sum(tf.pow(y_p-self.y, 2))/2
            grad_W2 = tf.gradients(xs=W2, ys=self.loss)[0]
            grad_W1 = tf.gradients(xs=W1, ys=self.loss)[0]
            grad_A = tf.gradients(xs=A, ys=self.loss)[0]

            e = (y_p - self.y)
            h1_prime = tf.multiply(h1_aug, 1-h1_aug)[:,0:m]
            h2_prime = tf.multiply(h2_aug, 1-h2_aug)[:,0:j]

            #FA
            lmda2 = tf.matmul(e, tf.transpose(B2[0:j,:]))
            d2 = np.multiply(h2_prime, lmda2)

            #Feedback data for saving
            #Only take first item in epoch
            delta_bp2 = tf.matmul(e, tf.transpose(W2[0:m,:]))[0,:]
            delta_fa2 = tf.matmul(e, tf.transpose(B2[0:m,:]))[0,:]
            delta_bp1 = tf.matmul(d2, tf.transpose(W1[0:m,:]))[0,:]
            delta_fa1 = tf.matmul(d2, tf.transpose(B1[0:m,:]))[0,:]
            norm_W1 = tf.norm(W1)
            norm_B1 = tf.norm(B1)
            norm_W2 = tf.norm(W2)
            norm_B2 = tf.norm(B2)
            error_FA1 = tf.norm(delta_bp1 - delta_fa1)
            error_FA2 = tf.norm(delta_bp2 - delta_fa2)
            alignment1 = tf_align(delta_fa1, delta_bp1)
            alignment2 = tf_align(delta_fa2, delta_bp2)

            #evals = tf_eigvals(tf.matmul(tf.transpose(B), W))
            #evecs = tf_eigvecs(tf.matmul(tf.transpose(B), W))

            #self.training_metrics = [alignment1, norm_W1, norm_B1, error_FA1, alignment2, norm_W2, norm_B2, error_FA2]
            #for idx in range(n):
                #Compute alignment with evecs of 

            new_W2 = W2.assign(W2 - self.config.learning_rate*grad_W2)
            new_W1 = W1.assign(W1 - self.config.learning_rate*grad_W1)
            new_A = A.assign(A - self.config.learning_rate*grad_A)            
            self.train_step = [new_W2, new_W1, new_A]
            correct_prediction = tf.equal(tf.argmax(y_p, 1), tf.argmax(self.y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

            #True gradients...
            #dl1 = tf.gradients(xs=l1, ys=self.loss)[0]
            #dl2 = tf.gradients(xs=l2, ys=self.loss)[0]

            #Save training metrics
            Bs = [B1, B2]
            Ws = [W1, W2]
            es = [d2, e]
            #dls = [dl1, dl2, dl3, dl4, dl5]
            #ls = [l1, l2, l3, l4, l5]
            dls = []
            ls = [h1_aug, h2_aug]
            self._set_training_metrics(es, Bs, Ws, dls, ls, self.config.learning_rate)