def grid():

	X,Y = transform_data()
	X,Y = shuffle(X,Y)
	N = len(X)//2
	Xtrain = X[:N]
	Ytrain = Y[:N]
	Ttrain = generate_T(Ytrain)
	Xtest = X[N:]
	Ytest = Y[N:]
	Ttest = generate_T(Ytest)
	N,D = Xtrain.shape
	K = len(set(Y))
	w0 = np.random.randn(D,K)/np.sqrt(D+K)
	b0 = np.random.randn(K)/np.sqrt(K)
	learning_rates = [10**i for i in range(-7,-3,1)]
	momentums = [1-10**i for i in sorted(list(range(-4,0)),reverse=True)]
	iterations = 2000
	best_lr = 0
	best_momentum = 0
	best_cr = 0
	cost = {}
	cr = {}
	for lr in learning_rates:
		learning_rate = lr
		for mu in momentums:
			dw = 0
			db = 0
			cost[(lr,mu)] = list()
			cr[(lr,mu)] = list()
			for i in range(iterations):
				if i == 0:
					A_train = relu(Xtrain.dot(w0) + b0)
					A_test = relu(Xtest.dot(w0) + b0)
				else:
					A_train = relu(Xtrain.dot(w) + b0)
					A_test = relu(Xtest.dot(w) + b0)	
				Y_train = np.exp(A_train)/np.exp(A_train).sum(axis=1,keepdims=True)
				Y_test = np.exp(A_test)/np.exp(A_test).sum(axis=1,keepdims=True)
				P_test = np.argmax(Y_test,axis=1)
				cost[(lr,mu)].append(cross_entropy(Y_test,Ttest))
				current_cr = classification_rate(P_test,Ytest)
				cr[(lr,mu)].append(current_cr)
				if current_cr > best_cr:
					best_cr = current_cr
					best_lr = lr
					best_mu = mu
				dw = mu*dw - (1-mu)*learning_rate*derivative_w(Xtrain,Y_train,Ttrain)
				db = mu*db - (1-mu)*learning_rate*derivative_b(Y_train,Ttrain)
				if i == 0:
					w = w0 + dw
					b = b0 + db
				else:
					w += dw
					b += db
				if i % 100 == 0:
					print('Learning Rate: ',lr,'Momentum: ',mu,'Cost: ',cost[(lr,mu)][i],'Classification Rate: ',cr[(lr,mu)][i])
				if i == (iterations - 1):
					print('')
	return cost,cr,best_lr,best_mu,best_cr
示例#2
0
def exp_decay(learning_rate):

    X, Y = transform_data()
    X, Y = shuffle(X, Y)
    N = len(X) // 2
    Xtrain = X[:N]
    Ytrain = Y[:N]
    Ttrain = generate_T(Ytrain)
    Xtest = X[N:]
    Ytest = Y[N:]
    Ttest = generate_T(Ytest)
    N, D = Xtrain.shape
    M = 100
    K = len(set(Y))
    iterations = 50
    batch_N = 250
    batches = N // batch_N
    dv = 0
    d_b1 = 0
    dw = 0
    d_b0 = 0
    mu = .9
    v = np.random.randn(M, K) / np.sqrt(M + K)
    b_1 = np.random.randn(K) / np.sqrt(K)
    w = np.random.randn(D, M) / np.sqrt(D + M)
    b_0 = np.random.randn(M) / np.sqrt(M)
    learning_rate = learning_rate
    exp_cost = []
    exp_cr = []
    exp_lr = []
    best_exp = 0
    best_iteration = 0
    for i in range(iterations):
        learning_rate = learning_rate * np.exp(-K * i)
        exp_lr.append(learning_rate)
        for b in range(batches):
            X = Xtrain[b * batches:(b + 1) * batches, :]
            T = Ttrain[b * batches:(b + 1) * batches, :]
            Y, Z = generate_Y('tanh', X, w, b_0, v, b_1)
            Y_test, _ = generate_Y('tanh', Xtest, w, b_0, v, b_1)
            P_test = np.argmax(Y_test, axis=1)
            if b % batches == 0:
                exp_cost.append(cross_entropy(Y_test, Ttest))
                cr = classification_rate(P_test, Ytest)
                exp_cr.append(cr)
                if cr > best_exp:
                    best_exp = cr
                    best_iteration = i
            dv = mu * dv - learning_rate * derivative_v('tanh', Z, Y, T)
            d_b1 = mu * d_b1 - learning_rate * derivative_b1('tanh', Y, T)
            dw = mu * dw - learning_rate * derivative_w('tanh', X, Y, Z, T, v)
            d_b0 = mu * d_b0 - learning_rate * derivative_b0(
                'tanh', Y, Z, T, v)
            v += dv
            b_1 += d_b1
            w += dw
            b_0 += d_b0
        if i % 10 == 0:
            print('Exp Cost: ', exp_cost[i], 'Exp Classification: ', exp_cr[i])
    return exp_cost, exp_cr, exp_lr, best_exp, best_iteration
示例#3
0
def nesterov_momentum(learning_rate):

    X, Y = transform_data()
    X, Y = shuffle(X, Y)
    N = len(X) // 2
    Xtrain = X[:N]
    Ytrain = Y[:N]
    Ttrain = generate_T(Ytrain)
    Xtest = X[N:]
    Ytest = Y[N:]
    Ttest = generate_T(Ytest)
    N, D = Xtrain.shape
    M = 100
    K = len(set(Y))
    iterations = 50
    batch_N = 250
    batches = N // batch_N
    v = np.random.randn(M, K) / np.sqrt(M + K)
    b_1 = np.random.randn(K) / np.sqrt(K)
    w = np.random.randn(D, M) / np.sqrt(D + M)
    b_0 = np.random.randn(M) / np.sqrt(M)
    mu = .9
    dv = 0
    db_1 = 0
    dw = 0
    db_0 = 0
    nesterov_cost = []
    nesterov_cr = []
    best_nesterov = 0
    best_iteration = 0
    for i in range(iterations):
        for b in range(batches):
            X = Xtrain[b * batch_N:(b + 1) * batch_N, :]
            T = Ttrain[b * batch_N:(b + 1) * batch_N, :]
            Y, Z = generate_Y('tanh', X, w, b_0, v, b_1)
            Y_test, _ = generate_Y('tanh', Xtest, w, b_0, v, b_1)
            P_test = np.argmax(Y_test, axis=1)
            if b % batches == 0:
                nesterov_cost.append(cross_entropy(Y_test, Ttest))
                cr = classification_rate(P_test, Ytest)
                nesterov_cr.append(cr)
                if cr > best_nesterov:
                    best_nesterov = cr
                    best_iteration = i
            dv = mu * dv - learning_rate * derivative_v('tanh', Z, Y, T)
            db_1 = mu * db_1 - learning_rate * derivative_b1('tanh', Y, T)
            dw = mu * dw - learning_rate * derivative_w('tanh', X, Y, Z, T, v)
            db_0 = mu * db_0 - learning_rate * derivative_b0(
                'tanh', Y, Z, T, v)
            v += mu * dv - learning_rate * derivative_v('tanh', Z, Y, T)
            b_1 += mu * db_1 - learning_rate * derivative_b1('tanh', Y, T)
            w += mu * dw + learning_rate * derivative_w('tanh', X, Y, Z, T, v)
            b_0 += mu * db_0 - learning_rate * derivative_b0(
                'tanh', Y, Z, T, v)
        if i % 100 == 0:
            print('Nesterov Cost: ', nesterov_cost[i],
                  'Nesterov Classification: ', nesterov_cr[i])
    return nesterov_cost, nesterov_cr, best_nesterov, best_iteration
示例#4
0
    def train(self, X, Y, activation=1, lr=10e-7, reg=10e-7, epoch=10):
        N, D = X.shape  #Diamentionality of our data
        batch_size = 500
        n_batches = int(N / batch_size)
        ind = tar2ind(
            Y
        )  # WE convert our target array into indicator matrix using one hot encoding
        _, K = ind.shape

        self.W1 = np.random.randn(D, self.M) / np.sqrt(
            D)  #Input to hidden weight
        self.W2 = np.random.randn(self.M, K) / np.sqrt(
            self.M)  #Hidden to output weights
        self.b1 = np.random.randn(self.M)
        self.b2 = np.random.randn(K)
        dW2 = 0
        db2 = 0
        dW1 = 0
        db1 = 0
        mu = 0.9  # Momentum
        decay_rate = 0.99

        cost = []
        for n in range(0, 200):
            #tempx , tempy = shuffle(X, ind)
            for i in range(0, n_batches):
                X_tr = X[i * batch_size:(i * batch_size + batch_size), :]
                Y_tr = Y[i * batch_size:(i * batch_size + batch_size), ]
                ind = tar2ind(Y_tr)
                output, hidden = forward(X_tr, activation, self.W1, self.b1,
                                         self.W2, self.b2)

                #Performing backpropagation now
                dW2 = mu * dW2 + lr * (derivative_W2(ind, output, hidden, reg,
                                                     self.W2))
                self.W2 = self.W2 + dW2
                db2 = mu * db2 + lr * (derivative_b2(ind, output, reg,
                                                     self.b2))
                self.b2 = self.b2 + db2
                dW1 = mu * dW1 + lr * (derivative_W1(
                    ind, output, hidden, self.W2, X_tr, activation, reg,
                    self.W1))
                self.W1 = self.W1 + dW1
                db1 = mu * db1 + lr * (derivative_b1(
                    ind, output, hidden, self.W2, activation, reg, self.b1))
                self.b1 = self.b1 + db1
                c = cross_entropy(ind, output)
                cost.append(c)

                if i % 10 == 0:
                    result = np.argmax(output, axis=1)
                    r = classification_rate(Y_tr, result)
                    print("iteration:- ", i, "cost:- ", c,
                          "classification rate:- ", r)
示例#5
0
def batch(learning_rate):

    X, Y = transform_data()
    X, Y = shuffle(X, Y)
    N = len(X) // 2
    Xtrain = X[:N]
    Ytrain = Y[:N]
    Ttrain = generate_T(Ytrain)
    Xtest = X[N:]
    Ytest = Y[N:]
    Ttest = generate_T(Ytest)
    N, D = Xtrain.shape
    M = 100
    K = len(set(Y))
    iterations = 50
    batch_N = 250
    batches = len(X) // batch_N
    v = np.random.randn(M, K) / np.sqrt(M + K)
    b_1 = np.random.randn(K) / np.sqrt(K)
    w = np.random.randn(D, M) / np.sqrt(D + M)
    b_0 = np.random.randn(M) / np.sqrt(M)
    batch_cost = []
    batch_cr = []
    best_batch = 0
    best_iteration = 0
    for i in range(iterations):
        for b in range(batches):
            X = Xtrain[b * batch_N:(b + 1) * batch_N, :]
            T = Ttrain[b * batch_N:(b + 1) * batch_N, :]
            Y, Z = generate_Y('tanh', X, w, b_0, v, b_1)
            Y_test, _ = generate_Y('tanh', Xtest, w, b_0, v, b_1)
            P_test = np.argmax(Y_test, axis=1)
            if b % batches == 0:
                batch_cost.append(cross_entropy(Y_test, Ttest))
                cr = classification_rate(P_test, Ytest)
                batch_cr.append(cr)
                if cr > best_batch:
                    best_batch = cr
                    best_iteration = i
            v -= learning_rate * derivative_v('tanh', Z, Y, T)
            b_1 -= learning_rate * derivative_b1('tanh', Y, T)
            w -= learning_rate * derivative_w('tanh', X, Y, Z, T, v)
            b_0 -= learning_rate * derivative_b0('tanh', Y, Z, T, v)
        if i % 100 == 0:
            print('Batch Cost: ', batch_cost[i], 'Batch Classification: ',
                  batch_cr[i])
    return batch_cost, batch_cr, best_batch, best_iteration
train_costs = []
test_costs = []
learning_rate = 0.001

for i in xrange(10000):
    pYtrain = fwd(Xtrain, w, b)
    pYtest = fwd(Xtest, w, b)

    ctrain = xentropy(Ytrain, pYtrain)
    ctest = xentropy(Ytest, pYtest)

    train_costs.append(ctrain)
    test_costs.append(ctest)

    w -= learning_rate * Xtrain.T.dot(pYtrain - Ytrain)
    b -= learning_rate * (pYtrain - Ytrain).sum()
    if i % 1000 == 0:
        print i, ctrain, ctest, xentropy(Ytrain,
                                         pYtrain), xentropy(Ytest, pYtest)

print "Final train classification_rate", classification_rate(
    Ytrain, np.round(pYtrain))
print "Final train classification_rate", classification_rate(
    Ytest, np.round(pYtest))

legend1, = plt.plot(train_costs, label='train cost')
legend2, = plt.plot(test_costs, label='test cost')
plt.legend([legend1, legend2])
plt.show()
示例#7
0
'''
Created on May 14, 2017

@author: Varela
'''

#https://www.udemy.com/data-science-logistic-regression-in-python/learn/v4/t/lecture/5286980?start=0

import numpy as np
import pandas as pd

from ecommerce_preprocess import get_binary_data
from util import sigmoid, fwd, classification_rate

#randomly predicts data
X, Y = get_binary_data()

D = X.shape[1]
W = np.random.randn(D)
b = 0

P_Y_given_X = fwd(X, W, b)
predictions = np.round(P_Y_given_X)

print "Score:", classification_rate(Y, predictions)
示例#8
0
def rmsprop(learning_rate):

    X, Y = transform_data()
    X, Y = shuffle(X, Y)
    N = len(X) // 2
    Xtrain = X[:N]
    Ytrain = Y[:N]
    Ttrain = generate_T(Ytrain)
    Xtest = X[N:]
    Ytest = Y[N:]
    Ttest = generate_T(Ytest)
    Ttest = generate_T(Ytest)
    N, D = Xtrain.shape
    M = 100
    K = len(set(Y))
    iterations = 50
    batch_N = 250
    batches = N // batch_N
    dv = 0
    d_b1 = 0
    dw = 0
    d_b0 = 0
    mu = .9
    v = np.random.randn(M, K) / np.sqrt(M + K)
    b_1 = np.random.randn(K) / np.sqrt(K)
    w = np.random.randn(D, M) / np.sqrt(D + M)
    b_0 = np.random.randn(M) / np.sqrt(M)
    cache_v = np.ones((M, K))
    cache_b1 = np.ones(K)
    cache_w = np.ones((D, M))
    cache_b0 = np.ones(M)
    epsilon = 10e-10
    decay = .9
    rmsprop_cost = []
    rmsprop_cr = []
    best_rms = 0
    best_iteration = 0
    for i in range(iterations):
        for b in range(batches):
            X = Xtrain[b * batches:(b + 1) * batches, :]
            T = Ttrain[b * batches:(b + 1) * batches, :]
            Y, Z = generate_Y('tanh', X, w, b_0, v, b_1)
            Y_test, _ = generate_Y('tanh', Xtest, w, b_0, v, b_1)
            P_test = np.argmax(Y_test, axis=1)
            if b % batches == 0:
                rmsprop_cost.append(cross_entropy(Y_test, Ttest))
                cr = classification_rate(P_test, Ytest)
                rmsprop_cr.append(cr)
                if cr > best_rms:
                    best_rms = cr
                    best_iteration = i
            cache_v = decay * cache_v + (1 - decay) * derivative_v(
                'tanh', Z, Y, T)**2
            cache_b1 = decay * cache_b1 + (1 - decay) * derivative_b1(
                'tanh', Y, T)**2
            cache_w = decay * cache_w + (1 - decay) * derivative_w(
                'tanh', X, Y, Z, T, v)**2
            cache_b0 = decay * cache_b0 + (1 - decay) * derivative_b0(
                'tanh', Y, Z, T, v)**2
            dv = mu * dv - learning_rate * derivative_v(
                'tanh', Z, Y, T) / (np.sqrt(cache_v + epsilon))
            d_b1 = mu * d_b1 - learning_rate * derivative_b1(
                'tanh', Y, T) / (np.sqrt(cache_b1 + epsilon))
            dw = mu * dw - learning_rate * derivative_w(
                'tanh', X, Y, Z, T, v) / (np.sqrt(cache_w + epsilon))
            d_b0 = mu * d_b0 - learning_rate * derivative_b0(
                'tanh', Y, Z, T, v) / (np.sqrt(cache_b0 + epsilon))
            v += dv
            b_1 += d_b1
            w += dw
            b_0 += d_b0
        if i % 10 == 0:
            print('RMSProp Cost: ', rmsprop_cost[i],
                  'RMSProp Classification: ', rmsprop_cr[i])
    return rmsprop_cost, rmsprop_cr, best_rms, best_iteration
示例#9
0
    Y = expA / expA.sum(axis=1, keepdims=True)
    return Y


def classification_rate(Y, P):
    n_correct = 0
    n_total = 0
    for i in xrange(len(Y)):
        n_total += 1
        if Y[i] == P[i]:
            n_correct += 1
    return float(n_correct) / n_total


P_Y_given_X = forward(X, W1, b1, W2, b2)
P = np.argmax(P_Y_given_X, axis=1)

# assert(len(P) == len(Y))
print "classification rate for random weights:", classification_rate(Y, P)

Z = utl.fwd(X, W1, b1)
A = Z.dot(W2) + b2
P_Y_given_X = utl.softmax(A)
P = np.argmax(P_Y_given_X, axis=1)
print "classification rate for random weights (test):", utl.classification_rate(
    Y, P)

utl.fwdprop(X, W1, b1, W2, b2)
print "classification rate for random weights (test-2):", utl.classification_rate(
    Y, P)
示例#10
0
def main(argv):
    #load data
    test_data_1 = np.load(FLAGS.data_dir + 'test_x_1.npy')
    test_data_2 = np.load(FLAGS.data_dir + 'test_x_2.npy')
    test_data_3 = np.load(FLAGS.data_dir + 'test_x_3.npy')
    test_data_4 = np.load(FLAGS.data_dir + 'test_x_4.npy')
    test_data = [test_data_1, test_data_2, test_data_3, test_data_4]

    test_labels_1 = np.load(FLAGS.data_dir + 'test_y_1.npy')
    test_labels_2 = np.load(FLAGS.data_dir + 'test_y_2.npy')
    test_labels_3 = np.load(FLAGS.data_dir + 'test_y_3.npy')
    test_labels_4 = np.load(FLAGS.data_dir + 'test_y_4.npy')
    test_labels = [test_labels_1, test_labels_2, test_labels_3, test_labels_4]

    train_data_1 = np.load(FLAGS.data_dir + 'train_x_1.npy')
    train_data_2 = np.load(FLAGS.data_dir + 'train_x_2.npy')
    train_data_3 = np.load(FLAGS.data_dir + 'train_x_3.npy')
    train_data_4 = np.load(FLAGS.data_dir + 'train_x_4.npy')
    train_data = [train_data_1, train_data_2, train_data_3, train_data_4]

    train_labels_1 = np.load(FLAGS.data_dir + 'train_y_1.npy')
    train_labels_2 = np.load(FLAGS.data_dir + 'train_y_2.npy')
    train_labels_3 = np.load(FLAGS.data_dir + 'train_y_3.npy')
    train_labels_4 = np.load(FLAGS.data_dir + 'train_y_4.npy')
    train_labels = [
        train_labels_1, train_labels_2, train_labels_3, train_labels_4
    ]

    #count data
    test_count = [
        test_data[0].shape[0], test_data[1].shape[0], test_data[2].shape[0],
        test_data[3].shape[0]
    ]
    train_count = [
        train_data[0].shape[0], train_data[1].shape[0], train_data[2].shape[0],
        train_data[3].shape[0]
    ]

    #specify model
    input_placeholder = tf.placeholder(tf.float32, [None, 16641],
                                       name='input_placeholder')
    my_network = tf.identity(model.build_network(input_placeholder),
                             name='output2')

    #define classification loss
    #code adapted from Paul Quint's hackathon 3
    REG_COEFF = 0.0001
    labels = tf.placeholder(tf.float32, [None, 7], name='labels')
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=labels,
                                                            logits=my_network)
    confusion_matrix_op = tf.confusion_matrix(tf.argmax(labels, axis=1),
                                              tf.argmax(my_network, axis=1),
                                              num_classes=7)
    regularization_losses = tf.get_collection(
        tf.GraphKeys.REGULARIZATION_LOSSES)
    total_loss = cross_entropy + REG_COEFF * sum(regularization_losses)

    #set up training and saving
    #code adapted from Paul Quint's hackathon 3
    global_step_tensor = tf.get_variable('global_step',
                                         trainable=False,
                                         shape=[],
                                         initializer=tf.zeros_initializer)
    optimizer = tf.train.AdamOptimizer()
    train_op = optimizer.minimize(total_loss, global_step=global_step_tensor)
    saver = tf.train.Saver()
    sum_cross_entropy = tf.reduce_mean(cross_entropy)

    EPOCHS_BEFORE_STOPPING = 12

    #run the actual training
    #code adapted from Paul Quint's hackathon 3
    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        best_test_conf_mxs = []
        best_epoch = [0, 0, 0, 0]
        best_test_ce = [10, 10, 10, 10]
        best_train_ce = [0, 0, 0, 0]
        best_classification_rate = [0, 0, 0, 0]
        epochs_since_best = [0, 0, 0, 0]

        for k in range(0, 4):
            session.run(tf.global_variables_initializer())
            batch_size = FLAGS.batch_size
            print("\n !!!!! NEW K (" + str(k) + ") !!!!!\n")
            for epoch in range(FLAGS.max_epoch_num):
                print("################### EPOCH " + str(epoch) +
                      " #####################")
                print("##################################################\n")

                ce_vals = []
                for i in range(train_count[k] // batch_size):
                    batch_data = train_data[k][i * batch_size:(i + 1) *
                                               batch_size, :]
                    batch_labels = train_labels[k][i * batch_size:(i + 1) *
                                                   batch_size]
                    _, train_ce = session.run([train_op, sum_cross_entropy], {
                        input_placeholder: batch_data,
                        labels: batch_labels
                    })
                    ce_vals.append(train_ce)
                avg_train_ce = sum(ce_vals) / len(ce_vals)
                best_train_ce[k] = avg_train_ce
                print('TRAIN CROSS ENTROPY: ' + str(avg_train_ce))

                print("\n##################################################")

            # run gradient steps and report mean loss on train data
            ce_vals = []
            conf_mxs = []
            for i in range(test_count[k] // batch_size):
                batch_data = test_data[k][i * batch_size:(i + 1) *
                                          batch_size, :]
                batch_labels = test_labels[k][i * batch_size:(i + 1) *
                                              batch_size]
                test_ce, conf_matrix = session.run(
                    [sum_cross_entropy, confusion_matrix_op], {
                        input_placeholder: batch_data,
                        labels: batch_labels
                    })
                ce_vals.append(test_ce)
                conf_mxs.append(conf_matrix)
            avg_test_ce = sum(ce_vals) / len(ce_vals)
            classification_rate = util.classification_rate(sum(conf_mxs), 7)
            print('TEST CROSS ENTROPY: ' + str(avg_test_ce))
            print('TEST CONFUSION MATRIX:')
            print(str(sum(conf_mxs)))
            print('TEST CLASSIFICATION RATE:' + str(classification_rate))
            best_test_conf_mxs.append(sum(conf_mxs))
            best_test_ce[k] = avg_test_ce
            best_classification_rate[k] = classification_rate

        print('Confusion Matrix: ')
        print(str(sum(best_test_conf_mxs)))
        print('Avg Test CE: ' + str(np.average(best_test_ce)))
        print('Avg Train CE: ' + str(np.average(best_train_ce)))
        print('Avg Classification Rate: ' +
              str(np.average(best_classification_rate)))
        print('Generating model now...')
        session.run(tf.global_variables_initializer())

        for j in range(0, 4):
            for epoch in range(FLAGS.max_epoch_num):
                for i in range(train_count[j] // batch_size):
                    batch_data = train_data[j][i * batch_size:(i + 1) *
                                               batch_size, :]
                    batch_labels = train_labels[j][i * batch_size:(i + 1) *
                                                   batch_size]
                    _, train_ce = session.run([train_op, sum_cross_entropy], {
                        input_placeholder: batch_data,
                        labels: batch_labels
                    })

        saver.save(session, FLAGS.save_dir)
        print('Model is generated and saved')
示例#11
0
    def full(self):

        for i in range(self.iterations):
            Y_train, Z = generate_Y(self.activation, self.Xtrain, self.w,
                                    self.b_0, self.v, self.b_1)
            P_train = np.argmax(Y_train, axis=1)
            Y_test, _ = generate_Y(self.activation, self.Xtest, self.w,
                                   self.b_0, self.v, self.b_1)
            P_test = np.argmax(Y_test, axis=1)
            self.train_cost.append(cross_entropy(Y_train, self.Ttrain))
            self.test_cost.append(cross_entropy(Y_test, self.Ttest))
            train_cr = classification_rate(P_train, self.Ytrain)
            self.train_cr.append(train_cr)
            test_cr = classification_rate(P_test, self.Ytest)
            self.test_cr.append(test_cr)
            if train_cr > self.best_train:
                self.best_train = train_cr
                self.train_iteration = i
            if test_cr > self.best_test:
                self.best_test = test_cr
                self.test_iteration = i
            self.m_v = self.decay_0 * self.m_v + (
                1 - self.decay_0) * derivative_v(self.activation, Z, Y_train,
                                                 self.Ttrain)
            self.dm_v = self.m_v / (1 - self.decay_0**(i + 1))
            self.v_v = self.decay_1 * self.v_v + (
                1 - self.decay_1) * derivative_v(self.activation, Z, Y_train,
                                                 self.Ttrain)**2
            self.dv_v = self.v_v / (1 - self.decay_1**(i + 1))
            self.m_b1 = self.decay_0 * self.m_b1 + (
                1 - self.decay_0) * derivative_b1(self.activation, Y_train,
                                                  self.Ttrain)
            self.dm_b1 = self.m_b1 / (1 - self.decay_0**(i + 1))
            self.v_b1 = self.decay_1 * self.v_b1 + (
                1 - self.decay_1) * derivative_b1(self.activation, Y_train,
                                                  self.Ttrain)**2
            self.dv_b1 = self.v_b1 / (1 - self.decay_1**(i + 1))
            self.m_w = self.decay_0 * self.m_w + (
                1 - self.decay_0) * derivative_w(self.activation, self.Xtrain,
                                                 Y_train, Z, self.Ttrain,
                                                 self.v)
            self.dm_w = self.m_w / (1 - self.decay_0**(i + 1))
            self.v_w = self.decay_1 * self.v_w + (
                1 - self.decay_1) * derivative_w(self.activation, self.Xtrain,
                                                 Y_train, Z, self.Ttrain,
                                                 self.v)**2
            self.dv_w = self.v_w / (1 - self.decay_1**(i + 1))
            self.m_b0 = self.decay_0 * self.m_b0 + (
                1 - self.decay_0) * derivative_b0(self.activation, Y_train, Z,
                                                  self.Ttrain, self.v)
            self.dm_b0 = self.m_b0 / (1 - self.decay_0**(i + 1))
            self.v_b0 = self.decay_1 * self.v_b0 + (
                1 - self.decay_1) * derivative_b0(self.activation, Y_train, Z,
                                                  self.Ttrain, self.v)**2
            self.dv_b0 = self.v_b0 / (1 - self.decay_1**(i + 1))
            self.v -= self.learning_rate * self.dm_v / (np.sqrt(self.dv_v +
                                                                self.epsilon))
            self.b_1 -= self.learning_rate * self.dm_b1 / (
                np.sqrt(self.dv_b1 + self.epsilon))
            self.w -= self.learning_rate * self.dm_w / (np.sqrt(self.dv_w +
                                                                self.epsilon))
            self.b_0 -= self.learning_rate * self.dm_b0 / (
                np.sqrt(self.dv_b0 + self.epsilon))
            if i % 100 == 0:
                print(i, 'Train Cost: ', self.train_cost[i],
                      'Train Classification Rate: ', self.train_cr[i])
示例#12
0
    def stochastic(self, samples):

        for i in range(self.iterations):
            current_X, current_T = shuffle(self.Xtrain, self.Ttrain)
            for s in range(samples):
                X = current_X[s, :].reshape(1, current_X.shape[1])
                T = current_T[s, :].reshape(1, current_T.shape[1])
                Y, Z = generate_Y(self.activation, X, self.w, self.b_0, self.v,
                                  self.b_1)
                Y_train, _ = generate_Y(self.activation, self.Xtrain, self.w,
                                        self.b_0, self.v, self.b_1)
                P_train = np.argmax(Y_train, axis=1)
                Y_test, _ = generate_Y(self.activation, self.Xtest, self.w,
                                       self.b_0, self.v, self.b_1)
                P_test = np.argmax(Y_test, axis=1)
                self.train_cost.append(cross_entropy(Y_train, self.Ttrain))
                self.test_cost.append(cross_entropy(Y_test, self.Ttest))
                train_cr = classification_rate(P_train, self.Ytrain)
                self.train_cr.append(train_cr)
                test_cr = classification_rate(P_test, self.Ytest)
                self.test_cr.append(test_cr)
                if train_cr > self.best_train:
                    self.best_train = train_cr
                    self.train_iteration = i
                if test_cr > self.best_test:
                    self.best_test = test_cr
                    self.test_iteration = i
                self.m_v = self.decay_0 * self.m_v + (
                    1 - self.decay_0) * derivative_v(self.activation, Z, Y, T)
                self.dm_v = self.m_v / (1 - self.decay_0**(i + 1))
                self.v_v = self.decay_1 * self.v_v + (
                    1 - self.decay_1) * derivative_v(self.activation, Z, Y,
                                                     T)**2
                self.dv_v = self.v_v / (1 - self.decay_1**(i + 1))
                self.m_b1 = self.decay_0 * self.m_b1 + (
                    1 - self.decay_0) * derivative_b1(self.activation, Y, T)
                self.dm_b1 = self.m_b1 / (1 - self.decay_0**(i + 1))
                self.v_b1 = self.decay_1 * self.v_b1 + (
                    1 - self.decay_1) * derivative_b1(self.activation, Y, T)**2
                self.dv_b1 = self.v_b1 / (1 - self.decay_1**(i + 1))
                self.m_w = self.decay_0 * self.m_w + (
                    1 - self.decay_0) * derivative_w(self.activation, X, Y, Z,
                                                     T, self.v)
                self.dm_w = self.m_w / (1 - self.decay_0**(i + 1))
                self.v_w = self.decay_1 * self.v_w + (
                    1 - self.decay_1) * derivative_w(self.activation, X, Y, Z,
                                                     T, self.v)**2
                self.dv_w = self.v_w / (1 - self.decay_1**(i + 1))
                self.m_b0 = self.decay_0 * self.m_b0 + (
                    1 - self.decay_0) * derivative_b0(self.activation, Y, Z, T,
                                                      self.v)
                self.dm_b0 = self.m_b0 / (1 - self.decay_0**(i + 1))
                self.v_b0 = self.decay_1 * self.v_b0 + (
                    1 - self.decay_1) * derivative_b0(self.activation, Y, Z, T,
                                                      self.v)**2
                self.dv_b0 = self.v_b0 / (1 - self.decay_1**(i + 1))
                self.v -= self.learning_rate * self.dm_v / (
                    np.sqrt(self.dv_v + self.epsilon))
                self.b_1 -= self.learning_rate * self.dm_b1 / (
                    np.sqrt(self.dv_b1 + self.epsilon))
                self.w -= self.learning_rate * self.dm_w / (
                    np.sqrt(self.dv_w + self.epsilon))
                self.b_0 -= self.learning_rate * self.dm_b0 / (
                    np.sqrt(self.dv_b0 + self.epsilon))
            if i % 100 == 0:
                print(i, 'Train Cost: ', self.train_cost[i],
                      'Train Classification Rate: ', self.train_cr[i])