def main(): # args from Simple Queries paper DIM = 30 WORDGRAMS = 2 MINCOUNT = 8 MINN = 3 MAXN = 3 BUCKET = 1000000 # adjust these EPOCH = 5 LR = 0.15 # 0.15 good for ~5000 KERN = 'lin' # lin or rbf or poly NUM_RUNS = 1 # number of test runs SUBSET_VAL = 300 # number of subset instances for self reported dataset LIN_C = 0.90 # hyperparameter for linear kernel run = 0 print("starting dictionary creation.............................") dictionary = Dictionary(WORDGRAMS, MINCOUNT, BUCKET, SUBSET_VAL, run) X_train, X_test, y_train, y_test = dictionary.get_train_and_test() print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) n_train = dictionary.get_n_train_instances() n_test = dictionary.get_n_manual_instances() X_train = dictionary.get_trainset() X_test = dictionary.get_manual_testset() print() print("starting optimization") #coef = kernel_mean_matching(X_train, X_test, n_train, n_test, kern='lin', B=10) coef = kernel_mean_matching(X_test, X_train[0], LIN_C, kern='lin', B=10) print(coef)
def create_dictionary(WORDGRAMS, MINCOUNT, BUCKET, KERN, SUBSET_VAL, LIN_C, run, model_version): print("starting dictionary creation") # dictionary must be recreated each run to get different subsample each time # initialize training start = time.time() dictionary = Dictionary(WORDGRAMS, MINCOUNT, BUCKET, KERN, SUBSET_VAL, LIN_C, run, model=model_version) end = time.time() print("dictionary took ", (end - start)/60.0, " time to create.") return dictionary
def main(): # args from Simple Queries paper DIM = 30 WORDGRAMS = 2 MINCOUNT = 8 MINN = 3 MAXN = 3 BUCKET = 1000000 # adjust these EPOCH = 20 LR = 0.10 # 0.15 good for ~5000 KERN = 'lin' # lin or rbf or poly NUM_RUNS = 1 # number of test runs SUBSET_VAL = 1000 # number of subset instances for self reported dataset LIN_C = 0.90 # hyperparameter for linear kernel BATCHSIZE = 1 # number of instances in each batch ##### instantiations ####################################### print("starting dictionary creation") # dictionary must be recreated each run to get different subsample each time # initialize training start = time.time() dictionary = Dictionary(WORDGRAMS, MINCOUNT, BUCKET, KERN, SUBSET_VAL, LIN_C, model='original') end = time.time() print("dictionary took ", (end - start) / 60.0, " time to create.") nwords = dictionary.get_nwords() nclasses = dictionary.get_nclasses() #initialize testing X_train, X_test, y_train, y_test = dictionary.get_train_and_test() print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) N_train = dictionary.get_n_train_instances() N_test = dictionary.get_n_test_instances() print("Number of Train instances: ", N_train, " Number of Test instances: ", N_test) ntrain_eachclass = dictionary.get_nlabels_eachclass_train() ntest_eachclass = dictionary.get_nlabels_eachclass_test() print("N each class TRAIN: ", ntrain_eachclass, " N each class TEST: ", ntest_eachclass) # manual labeled set (Kaggle dataset) X_manual = dictionary.get_manual_testset() y_manual = dictionary.get_manual_set_labels() N_manual = dictionary.get_n_manual_instances() print() print("Number of Manual testing instances: ", N_manual, " shape: ", X_manual.shape) nmanual_eachclass = dictionary.get_nlabels_eachclass_manual() print("N each class Manual testing instances: ", nmanual_eachclass) print("#####################################") p = X_train.shape[1] # A #A_n = nwords + BUCKET # cols A_n = p A_m = DIM # rows uniform_val = 1.0 / DIM np.random.seed(0) A = np.random.uniform(-uniform_val, uniform_val, (A_m, A_n)) # B B_n = DIM # cols B_m = nclasses # rows B = np.zeros((B_m, B_n)) #### train ################################################ print("A: ", A.shape) print("B: ", B.shape) print("X_trian: ", X_train.shape) print("labels: ", y_train.shape) losses_train = [] losses_test = [] losses_manual = [] print() print() X_train = normalize(X_train, axis=1, norm='l1') X_test = normalize(X_test, axis=1, norm='l1') X_manual = normalize(X_manual, axis=1, norm='l1') #X_train_batches = np.vsplit(X_train, BATCHES ) #X_train = np.toarray(X_train) #X_train_batches = np.array_split(X_train.todense(), BATCHES) #print("****** ", X_train_batches[0].shape) #y_train_batches = np.array_split(y_train, BATCHES) traintime_start = time.time() for i in range(EPOCH): print() print("EPOCH: ", i) alpha = LR * (1 - i / EPOCH) # linearly decaying lr alpha train_loss = 0 start = 0 batchnum = 0 while start <= N_train: batch = X_train.tocsr()[start:start + BATCHSIZE, :] y_train_batch = y_train[start:start + BATCHSIZE, :] # Forward Propogation hidden = sparse.csr_matrix.dot(A, batch.T) hidden = normalize(hidden, axis=1, norm='l1') z2 = np.dot(B, hidden) # softmax Y_hat = softmax(z2, theta=1.0, axis=0) #loglike = np.log(Y_hat) #train_loss = -np.multiply(y_train_batches[batch_num], loglike.T) # need to multiply element wise here #### Back prop ######################################################### # B update gradient = alpha * np.dot( np.subtract(Y_hat.T, y_train_batch).T, hidden.T) B = np.subtract(B, gradient) # A update first = np.dot(np.subtract(Y_hat.T, y_train_batch), B) gradient = alpha * sparse.csr_matrix.dot(first.T, batch) A = np.subtract(A, gradient) batchnum += 1 if start + BATCHSIZE >= N_train: batch = X_train.tocsr()[start:-1, :] # rest of train set y_train_batch = y_train[start:-1, :] # Forward Propogation hidden = sparse.csr_matrix.dot(A, batch.T) hidden = normalize(hidden, axis=1, norm='l1') z2 = np.dot(B, hidden) # softmax Y_hat = softmax(z2, theta=1.0, axis=0) #loglike = np.log(Y_hat) #train_loss = -np.multiply(y_train_batches[batch_num], loglike.T) # need to multiply element wise here #### Back prop ######################################################### # B update gradient = alpha * np.dot( np.subtract(Y_hat.T, y_train_batch).T, hidden.T) B = np.subtract(B, gradient) # A update first = np.dot(np.subtract(Y_hat.T, y_train_batch), B) gradient = alpha * sparse.csr_matrix.dot(first.T, batch) A = np.subtract(A, gradient) break else: start = start + BATCHSIZE # TRAINING LOSS #train_loss = np.sum(train_loss)/N_train hidden_train = sparse.csr_matrix.dot(A, X_train.T) hidden_train = normalize(hidden_train, axis=1, norm='l1') z2_train = np.dot(B, hidden_train) Y_hat_train = softmax(z2_train, theta=1.0, axis=0) loglike_train = np.log(Y_hat_train) train_loss = -np.multiply( y_train, loglike_train.T) # need to multiply element wise here train_loss = np.sum(train_loss) / N_train print("Train: ", train_loss) ## TESTING LOSS hidden_test = sparse.csr_matrix.dot(A, X_test.T) hidden_test = normalize(hidden_test, axis=1, norm='l1') z2_test = np.dot(B, hidden_test) Y_hat_test = softmax(z2_test, theta=1.0, axis=0) loglike_test = np.log(Y_hat_test) test_loss = -np.multiply( y_test, loglike_test.T) # need to multiply element wise here test_loss = np.sum(test_loss) / N_test print("Test: ", test_loss) ## MANUAL SET TESTING LOSS hidden_man = sparse.csr_matrix.dot(A, X_manual.T) hidden_man = normalize(hidden_man, axis=1, norm='l1') z2_man = np.dot(B, hidden_man) Y_hat_man = softmax(z2_man, theta=1.0, axis=0) loglike_manual = np.log(Y_hat_man) manual_loss = -np.multiply( y_manual, loglike_manual.T) # need to multiply element wise here manual_loss = np.sum(manual_loss) / N_manual print("Manual Set: ", manual_loss) #### Back prop ######################################################### # B update #gradient = alpha * np.dot(np.subtract(Y_hat.T, y_train).T, hidden.T) #B = np.subtract(B, gradient) # A update #first = np.dot(np.subtract(Y_hat.T, y_train), B) #gradient = alpha * sparse.csr_matrix.dot(first.T, X_train) #A = np.subtract(A, gradient) #train_class_error, train_precision, train_recall, train_F1, train_AUC, train_FPR, train_TPR = metrics(Y_hat, y_train) #test_class_error, test_precision, test_recall, test_F1, test_AUC, test_FPR, test_TPR = metrics(X_test, y_test, A, B, N_test) #manual_class_error, manual_precision, manual_recall, manual_F1, manual_AUC, manual_FPR, manual_TPR = metrics(X_manual, y_manual, A, B, N_manual) #print() #print("TRAIN:") #print(" Classification Err: ", train_class_error) #print(" Precision: ", train_precision) #print(" Recall: ", train_recall) #print(" F1: ", train_F1) #print("TEST:") #print(" Classification Err: ", test_class_error) #print(" Precision: ", test_precision) #print(" Recall: ", test_recall) #print(" F1: ", test_F1) #print() #print("MANUAL:") #print(" Classification Err: ", manual_class_error) #print(" Precision: ", manual_precision) #print(" Recall: ", manual_recall) #print(" F1: ", manual_F1) losses_train.append(train_loss) losses_test.append(test_loss) losses_manual.append(manual_loss) i += 1 traintime_end = time.time() print("model took ", (traintime_end - traintime_start) / 60.0, " time to train") epochs = [l for l in range(EPOCH)] plt.plot(epochs, losses_train, 'm', label="train") plt.plot(epochs, losses_test, 'c', label="test") plt.plot(epochs, losses_manual, 'g', label="manual") title = "Main_temp: n_train: ", N_train, " n_test: ", N_test, " n_manual ", N_manual plt.title(title) plt.ylabel('loss') plt.xlabel('epoch') plt.legend(loc='upper left') plt.show()
def main(): # args from Simple Queries paper # DONT adjust these DIM = 30 WORDGRAMS = 3 MINCOUNT = 2 MINN = 3 MAXN = 3 BUCKET = 1000000 # adjust these EPOCH = 20 LR = 0.20 #0.15 good for ~5000 KERN = 'lin' # lin or rbf or poly NUM_RUNS = 5 # number of test runs SUBSET_VAL = 1000 # number of subset instances for self reported dataset LIN_C = 0.90 # hyperparameter for linear kernel model = 'kmm' file_names = [ 'KMMoutput/loss_train.txt', 'KMMoutput/loss_test.txt', 'KMMoutput/loss_manual.txt', 'KMMoutput/error_train.txt', 'KMMoutput/error_test.txt', 'KMMoutput/error_manual.txt', 'KMMoutput/precision_train.txt', 'KMMoutput/precision_test.txt', 'KMMoutput/precision_manual.txt', 'KMMoutput/recall_train.txt', 'KMMoutput/recall_test.txt', 'KMMoutput/recall_manual.txt', 'KMMoutput/F1_train.txt', 'KMMoutput/F1_test.txt', 'KMMoutput/F1_manual.txt', 'KMMoutput/AUC_train.txt', 'KMMoutput/AUC_test.txt', 'KMMoutput/AUC_manual.txt' ] create_readme(DIM, WORDGRAMS, MINCOUNT, MINN, MAXN, BUCKET, EPOCH, LR, KERN, NUM_RUNS, SUBSET_VAL, LIN_C) ##### instantiations ####################################### for run in range(NUM_RUNS): print( "*******************************************************RUN NUMBER: ", run) print() print("starting dictionary creation") # dictionary must be recreated each run to get different subsample each time # initialize training start = time.time() dictionary = Dictionary(WORDGRAMS, MINCOUNT, BUCKET, KERN, SUBSET_VAL, LIN_C, model) end = time.time() print("Dictionary took ", (end - start) / 60.0, " minutes to create.") nwords = dictionary.get_nwords() nclasses = dictionary.get_nclasses() #initialize testing X_train, X_test, y_train, y_test = dictionary.get_train_and_test() print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) N_train = dictionary.get_n_train_instances() N_test = dictionary.get_n_test_instances() print("Number of Train instances: ", N_train, " Number of Test instances: ", N_test) ntrain_eachclass = dictionary.get_nlabels_eachclass_train() ntest_eachclass = dictionary.get_nlabels_eachclass_test() print("N each class TRAIN: ", ntrain_eachclass, " N each class TEST: ", ntest_eachclass) # manual labeled set (Kaggle dataset) X_manual = dictionary.get_manual_testset() y_manual = dictionary.get_manual_set_labels() N_manual = dictionary.get_n_manual_instances() print() print("Number of Manual testing instances: ", N_manual, " shape: ", X_manual.shape) nmanual_eachclass = dictionary.get_nlabels_eachclass_manual() print("N each class Manual testing instances: ", nmanual_eachclass) print("#####################################") p = X_train.shape[1] # A #A_n = nwords + BUCKET # cols A_n = p A_m = DIM # rows uniform_val = 1.0 / DIM np.random.seed(0) A = np.random.uniform(-uniform_val, uniform_val, (A_m, A_n)) # B B_n = DIM # cols B_m = nclasses # rows B = np.zeros((B_m, B_n)) beta = dictionary.get_optbeta( ) # NOTE: optimal KMM reweighting coefficient # NOTE: run with ones to check implementation. Should get values close to original (w/out reweithting coef) #beta = np.ones((N_train)) #### train ################################################ print() print() for i in range(EPOCH): print() print("EPOCH: ", i) # linearly decaying lr alpha alpha = LR * (1 - i / EPOCH) l = 0 train_loss = 0 # TRAINING for x in X_train: beta_n = beta[l] label = y_train[l] B_old = B A_old = A # Forward Propogation hidden = sparse.csr_matrix.dot(A_old, x.T) if np.sum(x) > 0: a1 = hidden / np.sum(x) else: a1 = hidden z2 = np.dot(B, a1) exps = np.exp(z2 - np.max(z2)) Y_hat = exps / np.sum(exps) # Back prop with alt optimization B = gradient_B(B_old, A_old, x, label, nclasses, alpha, DIM, a1, Y_hat, beta_n) A = gradient_A(B_old, A_old, x, label, nclasses, alpha, DIM, Y_hat, beta_n) # verify gradients #check_B_gradient(B_old, A_old, label, x, Y_hat, a1) #check_A_gradient(B_old, A_old, label, x, Y_hat) loglike = np.log(Y_hat) train_loss += -np.dot(label, loglike) l += 1 # TRAINING LOSS #train_loss = total_loss_function(X_train, y_train, A, B, N_train) train_loss = (1.0 / N_train) * train_loss print("Train: ", train_loss) # TESTING LOSS test_loss = total_loss_function(X_test, y_test, A_old, B_old, N_test) print("Test: ", test_loss) print("Difference = ", test_loss - train_loss) # MANUAL SET TESTING LOSS manual_loss = total_loss_function(X_manual, y_manual, A_old, B_old, N_manual) print("Manual Set: ", manual_loss) train_class_error, train_precision, train_recall, train_F1, train_AUC, train_FPR, train_TPR = metrics( X_train, y_train, A, B, N_train) test_class_error, test_precision, test_recall, test_F1, test_AUC, test_FPR, test_TPR = metrics( X_test, y_test, A, B, N_test) manual_class_error, manual_precision, manual_recall, manual_F1, manual_AUC, manual_FPR, manual_TPR = metrics( X_manual, y_manual, A, B, N_manual) print() print("TRAIN:") print(" Classification Err: ", train_class_error) print(" Precision: ", train_precision) print(" Recall: ", train_recall) print(" F1: ", train_F1) print("TEST:") print(" Classification Err: ", test_class_error) print(" Precision: ", test_precision) print(" Recall: ", test_recall) print(" F1: ", test_F1) print() print("MANUAL:") print(" Classification Err: ", manual_class_error) print(" Precision: ", manual_precision) print(" Recall: ", manual_recall) print(" F1: ", manual_F1) #### WRITING LOSSES with open('KMMoutput/loss_train.txt', '+a') as f: f.write("%s," % train_loss) with open('KMMoutput/loss_test.txt', '+a') as f: f.write("%s," % test_loss) with open('KMMoutput/loss_manual.txt', '+a') as f: f.write("%s," % manual_loss) #### WRITING ERROR with open('KMMoutput/error_train.txt', '+a') as f: f.write("%s," % train_class_error) with open('KMMoutput/error_test.txt', '+a') as f: f.write("%s," % test_class_error) with open('KMMoutput/error_manual.txt', '+a') as f: f.write("%s," % manual_class_error) #### WRITING PRECISION with open('KMMoutput/precision_train.txt', '+a') as f: f.write("%s," % train_precision) with open('KMMoutput/precision_test.txt', '+a') as f: f.write("%s," % test_precision) with open('KMMoutput/precision_manual.txt', '+a') as f: f.write("%s," % manual_precision) #### WRITING RECALL with open('KMMoutput/recall_train.txt', '+a') as f: f.write("%s," % train_recall) with open('KMMoutput/recall_test.txt', '+a') as f: f.write("%s," % test_recall) with open('KMMoutput/recall_manual.txt', '+a') as f: f.write("%s," % manual_recall) #### WRITING F1 with open('KMMoutput/F1_train.txt', '+a') as f: f.write("%s," % train_F1) with open('KMMoutput/F1_test.txt', '+a') as f: f.write("%s," % test_F1) with open('KMMoutput/F1_manual.txt', '+a') as f: f.write("%s," % manual_F1) #### WRITING AUC with open('KMMoutput/AUC_train.txt', '+a') as f: f.write("%s," % train_AUC) with open('KMMoutput/AUC_test.txt', '+a') as f: f.write("%s," % test_AUC) with open('KMMoutput/AUC_manual.txt', '+a') as f: f.write("%s," % manual_AUC) i += 1 # writing newline to file after each trial for name in file_names: with open(name, '+a') as f: f.write('\n') run += 1
return n_tr / n_te * np.sum(_sum) def get_optbeta(): return beta ################################################################### WORDGRAMS = 3 MINCOUNT = 2 BUCKET = 1000000 print("starting dictionary creation.............................") dictionary = Dictionary(WORDGRAMS, MINCOUNT, BUCKET) X_train, X_test, y_train, y_test = dictionary.get_train_and_test() print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) n_train = dictionary.get_n_train_instances() n_test = dictionary.get_n_manual_instances() X_train = dictionary.get_trainset() X_test = dictionary.get_manual_testset() B = n_train #sigma = np.std(X_train) # compute standard deviation ???? sigma = 0.25 b = (0.0, B) bounds = (b, b, b, b, b)
def main(): # args from Simple Queries paper DIM = 30 LR = 0.20 #0.15 good for ~5000 WORDGRAMS = 3 MINCOUNT = 2 MINN = 3 MAXN = 3 BUCKET = 1000000 EPOCH = 20 KERN = 'lin' # lin or rbf or poly NUM_RUNS = 5 # number of test runs SUBSET_VAL = 1000 # number of subset instances for self reported dataset LIN_C = 0.90 # hyperparameter for linear kernel print("starting dictionary creation") # initialize training start = time.time() dictionary = Dictionary(WORDGRAMS, MINCOUNT, BUCKET, KERN, SUBSET_VAL, LIN_C) end = time.time() print("Dictionary took ", (end - start) / 60.0, " minutes to create.") nwords = dictionary.get_nwords() nclasses = dictionary.get_nclasses() #initialize testing X_train, X_test, y_train, y_test = dictionary.get_train_and_test() print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) N_train = dictionary.get_n_train_instances() N_test = dictionary.get_n_test_instances() print("Number of Train instances: ", N_train, " Number of Test instances: ", N_test) ntrain_eachclass = dictionary.get_nlabels_eachclass_train() ntest_eachclass = dictionary.get_nlabels_eachclass_test() print("N each class TRAIN: ", ntrain_eachclass, " N each class TEST: ", ntest_eachclass) # manual labeled set (Kaggle dataset) X_manual = dictionary.get_manual_testset() y_manual = dictionary.get_manual_set_labels() N_manual = dictionary.get_n_manual_instances() print() print("Number of Manual testing instances: ", N_manual, " shape: ", X_manual.shape) nmanual_eachclass = dictionary.get_nlabels_eachclass_manual() print("N each class Manual testing instances: ", nmanual_eachclass) print("################################################################") beta = dictionary.get_optbeta( ) # NOTE: optimal KMM reweighting coefficient # NOTE: run with ones to check implementation. Should get values close to original (w/out reweithting coef) #beta = np.ones((N_train)) ##### instantiations ####################################### p = X_train.shape[1] # A #A_n = nwords + BUCKET # cols A_n = p A_m = DIM # rows uniform_val = 1.0 / DIM np.random.seed(0) A = np.random.uniform(-uniform_val, uniform_val, (A_m, A_n)) # B B_n = DIM # cols B_m = nclasses # rows B = np.zeros((B_m, B_n)) #### train ################################################ losses_train = [] losses_test = [] losses_manual = [] class_error_train = [] class_error_test = [] class_error_manual = [] prec_train = [] prec_test = [] prec_manual = [] recall_train = [] recall_test = [] recall_manual = [] F1_train = [] F1_test = [] F1_manual = [] AUC_train = [] AUC_test = [] AUC_manual = [] print() print() for i in range(EPOCH): print() print("EPOCH: ", i) # linearly decaying lr alpha alpha = LR * (1 - i / EPOCH) l = 0 train_loss = 0 # TRAINING for x in X_train: beta_n = beta[l] label = y_train[l] B_old = B A_old = A # Forward Propogation hidden = sparse.csr_matrix.dot(A_old, x.T) if np.sum(x) > 0: a1 = hidden / np.sum(x) else: a1 = hidden z2 = np.dot(B, a1) exps = np.exp(z2 - np.max(z2)) Y_hat = exps / np.sum(exps) # Back prop with alt optimization B = gradient_B(B_old, A_old, x, label, nclasses, alpha, DIM, a1, Y_hat, beta_n) A = gradient_A(B_old, A_old, x, label, nclasses, alpha, DIM, Y_hat, beta_n) # verify gradients #check_B_gradient(B_old, A_old, label, x, Y_hat, a1) #check_A_gradient(B_old, A_old, label, x, Y_hat) loglike = np.log(Y_hat) #train_loss += -beta_n * np.dot(label, loglike) train_loss += -np.dot(label, loglike) l += 1 # TRAINING LOSS #train_loss = total_loss_function(X_train, y_train, A, B, N_train) train_loss = (1.0 / N_train) * train_loss print("Train: ", train_loss) # TESTING LOSS test_loss = total_loss_function(X_test, y_test, A_old, B_old, N_test, beta) print("Test: ", test_loss) print("Difference = ", test_loss - train_loss) # MANUAL SET TESTING LOSS manual_loss = total_loss_function(X_manual, y_manual, A_old, B_old, N_manual, beta) print("Manual Set: ", manual_loss) train_class_error, train_precision, train_recall, train_F1, train_AUC, train_FPR, train_TPR = metrics( X_train, y_train, A, B, N_train) test_class_error, test_precision, test_recall, test_F1, test_AUC, test_FPR, test_TPR = metrics( X_test, y_test, A, B, N_test) manual_class_error, manual_precision, manual_recall, manual_F1, manual_AUC, manual_FPR, manual_TPR = metrics( X_manual, y_manual, A, B, N_manual) print() print("TRAIN:") print(" Classification Err: ", train_class_error) print(" Precision: ", train_precision) print(" Recall: ", train_recall) print(" F1: ", train_F1) print("TEST:") print(" Classification Err: ", test_class_error) print(" Precision: ", test_precision) print(" Recall: ", test_recall) print(" F1: ", test_F1) print() print("MANUAL:") print(" Classification Err: ", manual_class_error) print(" Precision: ", manual_precision) print(" Recall: ", manual_recall) print(" F1: ", manual_F1) losses_train.append(train_loss) losses_test.append(test_loss) losses_manual.append(manual_loss) class_error_train.append(train_class_error) class_error_test.append(test_class_error) class_error_manual.append(manual_class_error) prec_train.append(train_precision) prec_test.append(test_precision) prec_manual.append(manual_precision) recall_train.append(train_recall) recall_test.append(test_recall) recall_manual.append(manual_recall) F1_train.append(train_F1) F1_test.append(test_F1) F1_manual.append(manual_F1) AUC_train.append(train_AUC) AUC_test.append(test_AUC) AUC_manual.append(manual_AUC) i += 1 epochs = [l for l in range(EPOCH)] txt = "LR: ", LR, " Kern: ", KERN, plt.plot(epochs, losses_train, 'm', label="train") plt.plot(epochs, losses_test, 'c', label="test") plt.plot(epochs, losses_manual, 'g', label="manual") plt.ylabel('loss') plt.xlabel('epoch') title = "KMM LOSS, n_train: ", N_train, " n_test: ", N_test, " n_manual ", N_manual plt.title(title) plt.legend(loc='upper left') plt.text(.5, .05, txt, ha='center') plt.show() plt.plot(epochs, class_error_train, 'm', label="train classification error") plt.plot(epochs, class_error_test, 'c', label="test classification error") plt.plot(epochs, class_error_manual, 'g', label="manual classification error") plt.ylabel('loss') plt.xlabel('epoch') title = "KMM CLASS ERROR, n_train: ", N_train, " n_test: ", N_test, " n_manual ", N_manual, " kern: ", KERN plt.title(title) plt.legend(loc='upper left') plt.text(.5, .05, txt, ha='center') plt.show()
def main(): # args from Simple Queries paper DIM = 30 WORDGRAMS = 2 MINCOUNT = 8 MINN = 3 MAXN = 3 BUCKET = 1000000 # adjust these EPOCH = 20 LR = 0.15 # 0.15 good for ~5000 KERN = 'lin' # lin or rbf or poly NUM_RUNS = 1 # number of test runs SUBSET_VAL = 800 # number of subset instances for self reported dataset LIN_C = 0.90 # hyperparameter for linear kernel BATCHSIZE = 2 # number of instances in each batch ##### instantiations ####################################### print("starting dictionary creation") # dictionary must be recreated each run to get different subsample each time # initialize training start = time.time() dictionary = Dictionary(WORDGRAMS, MINCOUNT, BUCKET, KERN, SUBSET_VAL, LIN_C, model='original') end = time.time() print("dictionary took ", (end - start) / 60.0, " time to create.") nwords = dictionary.get_nwords() nclasses = dictionary.get_nclasses() #initialize testing X_train, X_test, y_train, y_test = dictionary.get_train_and_test() print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) N_train = dictionary.get_n_train_instances() N_test = dictionary.get_n_test_instances() print("Number of Train instances: ", N_train, " Number of Test instances: ", N_test) ntrain_eachclass = dictionary.get_nlabels_eachclass_train() ntest_eachclass = dictionary.get_nlabels_eachclass_test() print("N each class TRAIN: ", ntrain_eachclass, " N each class TEST: ", ntest_eachclass) # manual labeled set (Kaggle dataset) X_manual = dictionary.get_manual_testset() y_manual = dictionary.get_manual_set_labels() N_manual = dictionary.get_n_manual_instances() print() print("Number of Manual testing instances: ", N_manual, " shape: ", X_manual.shape) nmanual_eachclass = dictionary.get_nlabels_eachclass_manual() print("N each class Manual testing instances: ", nmanual_eachclass) print("#####################################") p = X_train.shape[1] # A #A_n = nwords + BUCKET # cols A_n = p A_m = DIM # rows uniform_val = 1.0 / DIM np.random.seed(0) A = np.random.uniform(-uniform_val, uniform_val, (A_m, A_n)) # B B_n = DIM # cols B_m = nclasses # rows B = np.zeros((B_m, B_n)) #### train ################################################ losses_train = [] losses_test = [] losses_manual = [] print() print() traintime_start = time.time() for i in range(EPOCH): print() print("EPOCH: ", i) # linearly decaying lr alpha alpha = LR * (1 - i / EPOCH) l = 0 train_loss = 0 start = 0 batchnum = 0 while start <= N_train: batch = X_train.tocsr()[start:start + BATCHSIZE, :] y_train_batch = y_train[start:start + BATCHSIZE, :] B_old = B A_old = A # Forward Propogation hidden = sparse.csr_matrix.dot(A, batch.T) sum_ = np.sum(batch, axis=1) sum_[sum_ == 0] = 1 # replace zeros with ones so divide will work sum_ = np.array(sum_).flatten() a1 = (hidden.T / sum_[:, None]).T z2 = np.dot(B, a1) Y_hat = stable_softmax(z2) # Back prop with alt optimization B = gradient_B(B_old, A_old, y_train_batch, alpha, a1, Y_hat) A = gradient_A(B_old, A_old, batch, y_train_batch, alpha, sum_, Y_hat) #loglike = np.log(Y_hat) #train_loss += -np.dot(y_train_batch, loglike) batchnum += 1 # NOTE figure this out, Might be missing last sample if start + BATCHSIZE >= N_train and start < N_train - 1: batch = X_train.tocsr()[start:-1, :] # rest of train set y_train_batch = y_train[start:-1, :] B_old = B A_old = A # Forward Propogation hidden = sparse.csr_matrix.dot(A, batch.T) sum_ = np.sum(batch, axis=1) sum_[sum_ == 0] = 1 # replace zeros with ones so divide will work sum_ = np.array(sum_).flatten() a1 = (hidden.T / sum_[:, None]).T z2 = np.dot(B, a1) Y_hat = stable_softmax(z2) # Back prop with alt optimization B = gradient_B(B_old, A_old, y_train_batch, alpha, a1, Y_hat) A = gradient_A(B_old, A_old, batch, y_train_batch, alpha, sum_, Y_hat) #loglike = np.log(Y_hat) #train_loss += -np.dot(y_train_batch, loglike) break else: start = start + BATCHSIZE # TRAINING LOSS #train_loss = train_loss * (1.0/N_train) #print("Train: ", train_loss) train_loss = get_total_loss(A, B, X_train, y_train, N_train) print("Train: ", train_loss) ## TESTING LOSS test_loss = get_total_loss(A, B, X_test, y_test, N_test) print("Test: ", test_loss) #print("Difference = ", test_loss - train_loss) ## MANUAL SET TESTING LOSS manual_loss = get_total_loss(A, B, X_manual, y_manual, N_manual) print("Manual Set: ", manual_loss) #train_class_error, train_precision, train_recall, train_F1, train_AUC, train_FPR, train_TPR = metrics(X_train, y_train, A, B, N_train) train_class_error = metrics(X_train, y_train, A, B, N_train) #test_class_error, test_precision, test_recall, test_F1, test_AUC, test_FPR, test_TPR = metrics(X_test, y_test, A, B, N_test) test_class_error = metrics(X_test, y_test, A, B, N_test) #manual_class_error, manual_precision, manual_recall, manual_F1, manual_AUC, manual_FPR, manual_TPR = metrics(X_manual, y_manual, A, B, N_manual) manual_class_error = metrics(X_manual, y_manual, A, B, N_manual) print() print("TRAIN:") print(" Classification Err: ", train_class_error) #print(" Precision: ", train_precision) #print(" Recall: ", train_recall) #print(" F1: ", train_F1) print("TEST:") print(" Classification Err: ", test_class_error) #print(" Precision: ", test_precision) #print(" Recall: ", test_recall) #print(" F1: ", test_F1) print() print("MANUAL:") print(" Classification Err: ", manual_class_error) #print(" Precision: ", manual_precision) #print(" Recall: ", manual_recall) #print(" F1: ", manual_F1) losses_train.append(train_loss) losses_test.append(test_loss) losses_manual.append(manual_loss) i += 1 traintime_end = time.time() print("model took ", (traintime_end - traintime_start) / 60.0, " time to train") epochs = [l for l in range(EPOCH)] plt.plot(epochs, losses_train, 'm', label="train") plt.plot(epochs, losses_test, 'c', label="test") plt.plot(epochs, losses_manual, 'g', label="manual") title = "Main_temp: n_train: ", N_train, " n_test: ", N_test, " n_manual ", N_manual #title = "Main_temp: n_train: ", N_train, " n_test: ", N_test plt.title(title) plt.ylabel('loss') plt.xlabel('epoch') plt.legend(loc='upper left') plt.show()