def run(L1, L2, overwrite=False, **kwargs): # check that pickles exist if any([not os.path.isfile('train.pkl'), overwrite]): pickle_data_sets() # load data train = read_pickle('train') validate = read_pickle('validate') test = read_pickle('test') # pdb.set_trace() # get dictionaries for activating functions and M afs = { 1: { 1: AF.ReLU(), 2: AF.ReLU(), 3: AF.ReLU(), 4: AF.ReLU(), 5: AF.ReLU(), 6: AF.ReLU(), 7: AF.ReLU(), 8: AF.ReLU(), 9: AF.ReLU(), 10: AF.softmax() } } Ms = {1: {1: 4, 2: 4, 3: 4, 4: 4, 5: 4, 6: 4, 7: 4, 8: 4, 9: 4}} # run cross validation results = train_validate_test(train, validate, test, L1, L2, afs, Ms, **kwargs) # flatten out Y and Y_hat Y_hat = results['Y_hat'] Y_hat = ANN.collapse_Y(Y_hat) Y = ANN.collapse_Y(test['Y']) # update results results['Y'] = Y results['Y_hat'] = Y_hat results['CM'] = confusion_matrix(Y, Y_hat) # send results to pickle pd.Series(results).to_pickle("./results_{:0.4}.pkl".format( results['validate'])) # output results return results
def train_validate_test(train, validate, test, af, M, l1, l2, **kwargs): # print message print(f"\nperforming train-validate-test for lambda: {l1},{l2}") # training results bp = back_propagation(train, validate, af, M, l1, l2, **kwargs) W = bp['W'] b = bp['b'] Z_train = feed_forward(train['PHI'], W, b, af) P_hat_train = Z_train[len(af)] acc_train = ANN.accuracy(train['Y'], P_hat_train) # validate results Z_validate = feed_forward(validate['PHI'], W, b, af) P_hat_validate = Z_validate[len(af)] acc_validate = ANN.accuracy(validate['Y'], P_hat_validate) # test results Z_test = feed_forward(test['PHI'], W, b, af) P_hat_test = Z_test[len(af)] acc_test = ANN.accuracy(test['Y'], P_hat_test) # collect results results = { 'W': W, 'b': b, 'P_hat': P_hat_test, 'accuracy': { 'train': acc_train, 'validate': acc_validate, 'test': acc_test } } return results
def train_validate_test(train, validate, test, L1, L2, afs, M, **kwargs): # instantiate some variables to keep track of best results bestAc = dict(train=0, validate=0, test=0) bestResults = None # cast a net and find best outcome for l1 in L1: for l2 in L2: for af in afs.values(): for m in M.values(): # print iteration to track progress print(f"\nworking on {l1},{l2}...") # update kwargs kwargs['lambda1'] = l1 kwargs['lambda2'] = l2 # instantiate ArtificalNeuralNet instance ann = ANN.ArtificalNeuralNet(test['PHI'], test['Y'], af, m) # solve and run cross validation test for this instance results = ann.train_validate_test(train, validate, test, **kwargs) # print iteration results print( f"train accuracy: {results['train']}, validate accuracy {results['validate']}" ) # update best values if results['validate'] > bestAc['validate']: bestAc['validate'] = results['validate'] bestResults = results print(f"best value (so far)!") # collect results results = pd.Series(bestResults) # send results to pickle pd.Series(results).to_pickle( f"./results_{np.round(bestAc['validate'],4)}.pkl") # ouput return results
def back_propagation(train, validate, af, M, l1, l2, **kwargs): """ explanation: Does batch gradient descent using the AdaGrad adaptive learning rate input: train: dict - 'PHI' & 'Y' validate: dict - 'PHI' & 'Y' af: dict - activation function class instances for ALL layers M: dict - numers of nodes for all hidden layers l1: float - lambda1 for LASSO regression l2: float - lambda2 for Ridge regression kwargs: save_plot: bool - saves plot if True eta0: float - initial 'basic' learning rate epsilon: float - very small number so that AdaGrad won't have 0 in denominator epochs: int - number of epochs for training batch_size: int - how many observations to feed at a time G0: float/int - what to initialize the AdaGrad constant to output: dict - weights 'W' and bias 'b' """ # input dimentions if train['Y'].shape[0] == train['Y'].size: pdb.set_trace() K = len(set(train['Y'])) else: K = train['Y'].shape[1] N, D = train['PHI'].shape # kwargs save_plot = kwargs['save_plot'] if 'save_plot' in kwargs else True eta0 = kwargs['eta0'] if 'eta0' in kwargs else 1e-3 epsilon = kwargs['epsilon'] if 'epsilon' in kwargs else 1e-8 epochs = kwargs['epochs'] if 'epochs' in kwargs else 1e3 epochs = int(epochs) batch_size = kwargs['batch_size'] if 'batch_size' in kwargs else train[ 'PHI'].shape[0] G0 = kwargs['G0'] if 'G0' in kwargs else 1 # set random W and b of appropriate shapes W = {} b = {} W[1] = np.random.randn(D, M[1]) b[1] = np.random.randn(M[1]) for l in range(2, L): W[l] = np.random.randn(M[l - 1], M[l]) b[l] = np.random.randn(M[l]) W[L] = np.random.randn(M[L - 1], K) b[L] = np.random.randn(K) # set up back propagation batches = N // batch_size J_train = np.zeros(epochs * batches) J_validate = np.zeros_like(J_train) GW, Gb = {}, {} for l in af: GW[l] = 1 Gb[l] = 1 for epoch in range(epochs): X, Y = shuffle(train['PHI'], train['Y']) for batch in range(batches): X_b = X[(batch * batch_size):(batch + 1) * batch_size] Y_b = Y[(batch * batch_size):(batch + 1) * batch_size] # feed forward Z = feed_forward(X_b, W, b, af) # set up dZ,H,dH,dW, and db dZ, H, dH, dW, db, G = {}, {}, {}, {}, {}, {} # start with output layer dH[L] = Z[L] - Y_b dW[L] = np.matmul(Z[L - 1].T, dH[L]) db[L] = dH[L].sum(axis=0) GW[L] += dW[L]**2 Gb[L] += db[L]**2 W[L] -= eta0 / np.sqrt(GW[L] + epsilon) * dW[L] / batch_size b[L] -= eta0 / np.sqrt(Gb[L] + epsilon) * db[L] # now work back through each layer till input layer for l in np.arange(2, L)[::-1]: dZ[l] = np.matmul(dH[l + 1], W[l + 1].T) dH[l] = dZ[l] * af[l].df(Z[l]) dW[l] = np.matmul(Z[l - 1].T, dH[l]) db[l] = dH[l].sum(axis=0) GW[l] += dW[l]**2 Gb[l] += db[l]**2 W[l] -= eta0 / np.sqrt(GW[l] + epsilon) * dW[l] / batch_size b[l] -= eta0 / np.sqrt(Gb[l] + epsilon) * db[l] # end with input layer dZ[1] = np.matmul(dH[2], W[2].T) dH[1] = dZ[1] * af[1].df(Z[1]) dW[1] = np.matmul(X_b.T, dH[1]) db[1] = dH[1].sum(axis=0) GW[1] += dW[1]**2 Gb[1] += db[1]**2 W[1] -= eta0 / np.sqrt(GW[1] + epsilon) * dW[1] / batch_size b[1] -= eta0 / np.sqrt(Gb[1] + epsilon) * db[1] # feed forward for whole train and validation sets Z_train = feed_forward(train['PHI'], W, b, af) Z_validate = feed_forward(validate['PHI'], W, b, af) # update train and validation cost functions index = batch + (epoch * batches) J_train[index] = ANN.cross_entropy(train['Y'], Z_train[L]) / N J_validate[index] = ANN.cross_entropy( validate['Y'], Z_validate[L]) / validate['Y'].shape[0] # save figure if save_plot: fig, ax = plt.subplots() fig.suptitle( f"$\\eta$: {eta}, epochs: {epochs}, batch size: {batch_size}") ax.plot(J_train, label="J: Training") ax.plot(J_validate, label="J: Validation") ax.set_xlabel("batch + (epochs x baches)") ax.set_ylabel("J") ax.legend(loc='best') if not os.path.isdir("J"): os.mkdir("J") savename = f"J/J_eta_{eta}_epochs_{epochs}.pdf" fig.savefig(savename) plt.close(fig) print(f"saved {savename}") # collect results results = { 'W': W, # weights 'b': b # bias # 'P_hat' : Z[L] # output predictions } # output results return results
def pickle_data_sets(): #=========================================================================== # load kaggle data #=========================================================================== data = pd.read_csv("usps_digit_recognizer.csv") # #=========================================================================== # # load up data https://pjreddie.com/projects/mnist-in-csv/ # #=========================================================================== # # # load training and test data # names = ['label'] + [f"pixel_{x}" for x in range(784)] # train = pd.read_csv("mnist_train.csv", names=names) # test = pd.read_csv("mnist_test.csv", names=names) # # # merge sets # data = train.append(test, ignore_index=True) #=========================================================================== # design matrix & target matrix #=========================================================================== # extract the label column y = data['label'].values data.drop(columns=['label'], inplace=True) X = data.values N, D = X.shape K = 10 # one-hot Y Y = ANN.one_hot_encode(y) # construct design matrix PHI = X / 255 # PHI = np.column_stack((np.ones((N,1)), X/255)) #=========================================================================== # shuffle data #=========================================================================== # shuffle PHI and Y PHI, Y = ANN.shuffle(PHI, Y) # get the numbers of observations for the data sets N_train = int(.6 * N) N_validate = int(.2 * N) N_test = N - N_validate - N_train # get the cross validation design matrices PHI_train = PHI[:N_train] PHI_validate = PHI[N_train:N_train + N_validate] PHI_test = PHI[N_train + N_validate:] # get the cross validation target arrays Y_train = Y[:N_train] Y_validate = Y[N_train:N_train + N_validate] Y_test = Y[N_train + N_validate:] # get panda.Series of sets train = pd.Series(dict(PHI=PHI_train, Y=Y_train)) validate = pd.Series(dict(PHI=PHI_validate, Y=Y_validate)) test = pd.Series(dict(PHI=PHI_test, Y=Y_test)) # send Series to pickle print("\npickle-ing train, validation and test sets...") train.to_pickle("train.pkl") validate.to_pickle("validate.pkl") test.to_pickle("test.pkl")
def back_propagation(train, **kwargs): # input dimentions N, D = train['PHI'].shape if train['Y'].shape[0] == train['Y'].size: K = len(set(train['Y'])) else: K = train['Y'].shape[1] # kwargs save_plot = kwargs['save_plot'] if 'save_plot' in kwargs else True eta = kwargs['eta'] if 'eta' in kwargs else 1e-3 epochs = kwargs['epochs'] if 'epochs' in kwargs else 1e3 epochs = int(epochs) batch_size = kwargs['batch_size'] if 'batch_size' in kwargs else N # set random W and b of appropriate shapes W1 = np.random.randn(D, M1) b1 = np.random.randn(M1) # W2 = np.random.randn(M1,M2) # b2 = np.random.randn(M2) # W3 = np.random.randn(M2,M3) # b3 = np.random.randn(M3) # W4 = np.random.randn(M3,M4) # b4 = np.random.randn(M4) # W5 = np.random.randn(M4,M5) # b5 = np.random.randn(M5) # W6 = np.random.randn(M5,K) # b6 = np.random.randn(K) W2 = np.random.randn(M1, K) b2 = np.random.randn(K) # set up back propagation batches = N // batch_size J_train = np.zeros(epochs * batches) for epoch in range(epochs): X, Y = ANN.shuffle(train['PHI'], train['Y']) for batch in range(batches): # start timer t0 = datetime.now() # get the batch data X_b = X[(batch * batch_size):(batch + 1) * batch_size] Y_b = Y[(batch * batch_size):(batch + 1) * batch_size] # feed forward Z1, P_hat = feed_forward(X_b, W1, b1, W2, b2) # Z1, Z2, Z3, Z4, Z5, P_hat = feed_forward(X_b,W1,b1,W2,b2,W3,b3,W4,b4,W5,b5,W6,b6) dH2 = P_hat - Y_b dW2 = np.matmul(Z1.T, dH2) db2 = dH2.sum(axis=0) W2 -= eta * dW2 b2 -= eta * db2 # dZ2 = np.matmul(dH3,W3.T) # dH2 = dZ2*(1 - Z2**2) # dW2 = np.matmul(Z1.T,dH2) # db2 = dH2.sum(axis=0) # W2 -= eta*dW2 # b2 -= eta*db2 dZ1 = np.matmul(dH2, W2.T) dH1 = dZ1 * (1 - Z1**2) dW1 = np.matmul(X_b.T, dH1) db1 = dH1.sum(axis=0) W1 -= eta * dW1 b1 -= eta * db1 # dH6 = P_hat-Y_b # dW6 = np.matmul(Z5.T,dH6) # db6 = dH6.sum(axis=0) # W6 -= eta*dW6 # b6 -= eta*db6 # # dZ5 = np.matmul(dH6,W6.T) # dH5 = dZ5*(Z5 > 0) # dW5 = np.matmul(Z4.T,dH5) # db5 = dH5.sum(axis=0) # W5 -= eta*dW5 # b5 -= eta*db5 # # dZ4 = np.matmul(dH5,W5.T) # dH4 = dZ4*(Z4 > 0) # dW4 = np.matmul(Z3.T,dH4) # db4 = dH4.sum(axis=0) # W4 -= eta*dW4 # b4 -= eta*db4 # # dZ3 = np.matmul(dH4,W4.T) # dH3 = dZ3*(Z3 > 0) # dW3 = np.matmul(Z2.T,dH3) # db3 = dH3.sum(axis=0) # W3 -= eta*dW3 # b3 -= eta*db3 # # dZ2 = np.matmul(dH3,W3.T) # dH2 = dZ2*(Z2 > 0) # dW2 = np.matmul(Z1.T,dH2) # db2 = dH2.sum(axis=0) # W2 -= eta*dW2 # b2 -= eta*db2 # # dZ1 = np.matmul(dH2,W2.T) # dH1 = dZ1*(Z1 > 0) # dW1 = np.matmul(X_b.T,dH1) # db1 = dH1.sum(axis=0) # W1 -= eta*dW1 # b1 -= eta*db1 # feed forward for whole train and validation sets Z1, P_hat = feed_forward(train['PHI'], W1, b1, W2, b2) # Z1, Z2, Z3, Z4, Z5, P_hat = feed_forward(train['PHI'],W1,b1,W2,b2,W3,b3,W4,b4,W5,b5,W6,b6) # update train and validation cost functions index = batch + (epoch * batches) J_train[index] = ANN.cross_entropy(train['Y'], P_hat) / N #=================================================================== # approximate time left till training is done #=================================================================== # find batch time tf = (datetime.now() - t0).seconds # find number of sub-epochs left epochs_left = J_train.shape[0] - index - 1 # time left till training done (minutes) time_left = tf * epochs_left / 60 print("Approximately {:0.2f} minutes left.".format(time_left)) # save figure if save_plot: fig, ax = plt.subplots() fig.suptitle( f"$\\eta$: {eta}, epochs: {epochs}, batch size: {batch_size}") ax.plot(J_train, label="J: Training") ax.set_xlabel("batch + (epochs x baches)") ax.set_ylabel("J") ax.legend(loc='best') if not os.path.isdir("J"): os.mkdir("J") savename = f"J/J_eta_{eta}_epochs_{epochs}.pdf" fig.savefig(savename) plt.close(fig) print(f"saved {savename}") # # collect results # results = { # 'W' : (W1,W2,W3,W4,W5,W6), # weights # 'b' : (b1,b2,b3,b4,b5,b6) # bias # } print("Accuracy: {:0.4f}".format(ANN.accuracy(train['Y'], P_hat)))
def run(af, M, L1, L2, **kwargs): # kwargs overwrite = kwargs['overwrite'] if 'overwrite' in kwargs else False #=========================================================================== # set up data #=========================================================================== # check that pickles exist if any([not os.path.isfile('train.pkl'), overwrite]): pickle_data_sets() # load data train = read_pickle('train') validate = read_pickle('validate') test = read_pickle('test') #=========================================================================== # run cross validation #=========================================================================== # run cross validation results = train_validate_test(train, validate, test, af, M, L1, L2, **kwargs) # update results results['CM'] = ANN.confusion_matrix(test['Y'], results['P_hat']) results['M'] = M results['af'] = af results['L'] = L results['lambda1'] = L1 results['lambda2'] = L2 results['eta'] = kwargs['eta'] if 'eta' in kwargs else 1e-3 results['epochs'] = kwargs['epochs'] if 'epochs' in kwargs else 1e3 results[ 'batch_size'] = kwargs['batch_size'] if 'batch_size' in kwargs else 30 #=========================================================================== # send results to pickle #=========================================================================== # get a list of the results in working directory DIR = np.array(os.listdir()) filter = ['results_' in x for x in DIR] DIR = DIR[filter] best = np.array([x[x.find("_") + 1:x.find(".pkl")] for x in DIR], dtype=np.float32).max() # use if there are results saved already -- add hoc quick fix # send results to pickle acc = results['accuracy']['validate'] if acc > best: print("\nfound new best result!") pd.Series(results).to_pickle("results_{:0.4}.pkl".format( results['accuracy']['validate'])) else: print("\nno such luck...") # # use if no results have been saved # pd.Series(results).to_pickle("results_{:0.4}.pkl".format(results['accuracy']['validate'])) print(results['accuracy']) # output results return results
def back_propagation(train, validate, af, M, l1, l2, **kwargs): # input dimentions if train['Y'].shape[0] == train['Y'].size: pdb.set_trace() K = len(set(train['Y'])) else: K = train['Y'].shape[1] N, D = train['PHI'].shape # kwargs save_plot = kwargs['save_plot'] if 'save_plot' in kwargs else True eta = kwargs['eta'] if 'eta' in kwargs else 1e-3 epochs = kwargs['epochs'] if 'epochs' in kwargs else 1e3 epochs = int(epochs) batch_size = kwargs['batch_size'] if 'batch_size' in kwargs else 30 # set random W and b of appropriate shapes W = {} b = {} W[1] = np.random.randn(D, M[1]) b[1] = np.random.randn(M[1]) for l in range(2, L): W[l] = np.random.randn(M[l - 1], M[l]) b[l] = np.random.randn(M[l]) W[L] = np.random.randn(M[L - 1], K) b[L] = np.random.randn(K) # set up back propagation batches = N // batch_size J_train = np.zeros(epochs * batches) J_validate = np.zeros_like(J_train) for epoch in range(epochs): X, Y = shuffle(train['PHI'], train['Y']) for batch in range(batches): X_b = X[(batch * batch_size):(batch + 1) * batch_size] Y_b = Y[(batch * batch_size):(batch + 1) * batch_size] # feed forward Z = feed_forward(X_b, W, b, af) # set up dZ,H,dH,dW, and db dZ, H, dH, dW, db = {}, {}, {}, {}, {} # start with output layer dH[L] = Z[L] - Y_b dW[L] = np.matmul(Z[L - 1].T, dH[L]) db[L] = dH[L].sum(axis=0) W[L] -= eta * dW[L] / batch_size b[L] -= eta * db[L] # now work back through each layer till input layer for l in np.arange(2, L)[::-1]: dZ[l] = np.matmul(dH[l + 1], W[l + 1].T) dH[l] = dZ[l] * af[l].df(Z[l]) dW[l] = np.matmul(Z[l - 1].T, dH[l]) db[l] = dH[l].sum(axis=0) W[l] -= eta * dW[l] / batch_size b[l] -= eta * db[l] # end with input layer dZ[1] = np.matmul(dH[2], W[2].T) dH[1] = dZ[1] * af[1].df(Z[1]) dW[1] = np.matmul(X_b.T, dH[1]) db[1] = dH[1].sum(axis=0) W[1] -= eta * dW[1] / batch_size b[1] -= eta * db[1] # feed forward for whole train and validation sets Z_train = feed_forward(train['PHI'], W, b, af) Z_validate = feed_forward(validate['PHI'], W, b, af) # update train and validation cost functions index = batch + (epoch * batches) J_train[index] = ANN.cross_entropy(train['Y'], Z_train[L]) / N J_validate[index] = ANN.cross_entropy( validate['Y'], Z_validate[L]) / validate['Y'].shape[0] # save figure if save_plot: fig, ax = plt.subplots() fig.suptitle( f"$\\eta$: {eta}, epochs: {epochs}, batch size: {batch_size}") ax.plot(J_train, label="J: Training") ax.plot(J_validate, label="J: Validation") ax.set_xlabel("batch + (epochs x baches)") ax.set_ylabel("J") ax.legend(loc='best') if not os.path.isdir("J"): os.mkdir("J") savename = f"J/J_eta_{eta}_epochs_{epochs}.pdf" fig.savefig(savename) plt.close(fig) print(f"saved {savename}") # collect results results = { 'W': W, # weights 'b': b # bias # 'P_hat' : Z[L] # output predictions } # output results return results
def run(af, M, L1, L2, **kwargs): # kwargs overwrite = kwargs['overwrite'] if 'overwrite' in kwargs else False #=========================================================================== # set up data #=========================================================================== # check that pickles exist if any([not os.path.isfile('train.pkl'), overwrite]): pickle_data_sets() # load data train = read_pickle('train') validate = read_pickle('validate') test = read_pickle('test') #=========================================================================== # run cross validation #=========================================================================== # run cross validation results = train_validate_test(train, validate, test, af, M, L1, L2, **kwargs) # update results try: results['CM'] = ANN.confusion_matrix(test['Y'], results['P_hat']) except: print("something went wrong with the confusion matrix") results['M'] = M results['af'] = af results['L'] = L results['lambda1'] = L1 results['lambda2'] = L2 results['eta'] = kwargs['eta'] if 'eta' in kwargs else 1e-3 results['epochs'] = kwargs['epochs'] if 'epochs' in kwargs else 1e3 results[ 'batch_size'] = kwargs['batch_size'] if 'batch_size' in kwargs else 30 #=========================================================================== # send results to pickle #=========================================================================== # validation accuracy from current run acc = results['accuracy']['validate'] # find a list of previously pickled best results DIR = np.array(os.listdir()) filter = ['results_' in x for x in DIR] DIR = DIR[filter] # if there are past results, save current results only if they are better than any previous results if len(DIR) > 0: best = np.array([x[x.find("_") + 1:x.find(".pkl")] for x in DIR], dtype=np.float32).max() if acc > best: print("\nfound new best result!") pd.Series(results).to_pickle("results_{:0.4}.pkl".format(acc)) else: print("\nno such luck...") # if there are no results, just save the current results else: print("\nfound new best result!") pd.Series(results).to_pickle("results_{:0.4}.pkl".format(acc)) print("Accuracy from this round: {:0.4f}".format(acc)) # output results return results