def eliminateBADfeaturesWITHperceptron(finalFeatureCASE, labelCASE, thresholdBIAS): featureCASE = 'w' tempList = [] delLIST = [] train_matrix, train_labels, test_matrix, test_labels, eval_matrix, eval_labels = eliminateZEROfetures( featureCASE, labelCASE) for c in range(len(train_matrix[0])): temp_train_matrix = copy.deepcopy(train_matrix) temp_train_matrix = np.delete(temp_train_matrix, c, axis=1) w, dummy = percepFunctions.weightedPerceptron(temp_train_matrix, train_labels, 'zeros', 0.675) train_pred_labels = projectFunctions.prediction_Perceptron( temp_train_matrix, train_labels, w) acc_train = projectFunctions.accuracyMETRIC(train_labels, train_pred_labels) dummyP, dummyR, f1_train = projectFunctions.f1METRIC( train_labels, train_pred_labels) tempList.append([c, acc_train, f1_train]) #print 'c:',c,' acc:',acc_train,' f1:',f1_train columnSUM = np.sum(tempList, axis=0) meanF1 = columnSUM[2] / len(tempList) for c in range(len(train_matrix[0])): if tempList[c][2] < meanF1 + thresholdBIAS: delLIST.append(c) train_matrix = np.delete(train_matrix, delLIST, axis=1) test_matrix = np.delete(test_matrix, delLIST, axis=1) eval_matrix = np.delete(eval_matrix, delLIST, axis=1) # if finalFeatureCASE == '01': for i in range(len(train_matrix)): train_matrix[i][train_matrix[i] > 0] = 1 for i in range(len(test_matrix)): test_matrix[i][test_matrix[i] > 0] = 1 for i in range(len(eval_matrix)): eval_matrix[i][eval_matrix[i] > 0] = 1 elif finalFeatureCASE == 'normalize': colRangeLIST = train_matrix.max(axis=0) - train_matrix.min(axis=0) for i in range(len(train_matrix)): for j in range(len(train_matrix[0])): train_matrix[i][j] = train_matrix[i][j] / colRangeLIST[j] colRangeLIST = test_matrix.max(axis=0) - test_matrix.min(axis=0) colRangeLIST[colRangeLIST == 0] = 1 for i in range(len(test_matrix)): for j in range(len(test_matrix[0])): test_matrix[i][j] = (test_matrix[i][j]) / colRangeLIST[j] colRangeLIST = eval_matrix.max(axis=0) - eval_matrix.min(axis=0) colRangeLIST[colRangeLIST == 0] = 1 for i in range(len(eval_matrix)): for j in range(len(eval_matrix[0])): eval_matrix[i][j] = (eval_matrix[i][j]) / colRangeLIST[j] return train_matrix, train_labels, test_matrix, test_labels, eval_matrix, eval_labels
import numpy as np import scipy import operator import featureTRANSFORM train_matrix, train_labels, test_matrix, test_labels, eval_matrix, eval_labels = featureTRANSFORM.eliminateZEROfetures( '01', -1) print '\n------------------ Winnow: ------------------' w_train, numUpdates = winnowFunctions.simpleWinnow(train_matrix, train_labels, 'ones', 2) train_pred_labels = projectFunctions.prediction_Winnow(train_matrix, train_labels, w_train) train_accuracy = projectFunctions.accuracyMETRIC(train_labels, train_pred_labels) dummy, fummy, train_f1_score = projectFunctions.f1METRIC( train_labels, train_pred_labels) print 'Accuracy : train: ', train_accuracy #Prediction on test test_pred_labels = projectFunctions.prediction_Winnow(test_matrix, test_labels, w_train) test_accuracy = projectFunctions.accuracyMETRIC(test_labels, test_pred_labels) test_f1_score = projectFunctions.f1METRIC(test_labels, test_pred_labels) print 'Accuracy : test: ', test_accuracy #Prediction on eval.anon eval_pred_labels = projectFunctions.prediction_Winnow(eval_matrix, eval_labels, w_train) eval_accuracy = projectFunctions.accuracyMETRIC(eval_labels, eval_pred_labels) eval_f1_score = projectFunctions.f1METRIC(eval_labels, eval_pred_labels) print 'Accuracy : eval.anon: ', eval_accuracy #Write leaderboard file #projectFunctions.write_solutions(eval_pred_labels,"simpleWinnow.csv")
import bewNeighborFunctions as kNN import featureTRANSFORM import projectFunctions as pF train_matrix, train_labels, test_matrix, test_labels, eval_matrix, eval_labels = featureTRANSFORM.eliminateZEROfetures( '01', -1) k = 3 p = 2 ''' pred_train_labels = kNN.kneighbors(train_matrix,train_labels,train_matrix,k,p) train_accuracy = pF.accuracyMETRIC(train_labels, pred_train_labels) dum,pum,train_f1 = pF.f1METRIC(train_labels,pred_train_labels) print 'train: Accuracy:',train_accuracy,'% F1:',train_f1 ''' pred_test_labels = kNN.kneighbors(train_matrix, train_labels, test_matrix, k, p) test_accuracy = pF.accuracyMETRIC(test_labels, pred_test_labels) dum, pum, test_f1 = pF.f1METRIC(test_labels, pred_test_labels) print 'test: Accuracy:', test_accuracy, '% F1:', test_f1 eval_pred_labels = kNN.kneighbors(train_matrix, train_labels, eval_matrix, k, p) solution_filename = raw_input( 'Enter x__x in ./solutions_log/x__x_solutions.csv: ') pF.write_solutions('perceptron', eval_pred_labels, './' + solution_filename + '.solutions.csv')
def algoFORepochs(numEpochs, trainMatrix, trainLabels, testMatrix, testLabels, algo_type, learningRate, initialWcondition, margin=None): #for info ARRAY infoList = [] temp_list = [] temp_list.append('shuffleTYPE') temp_list.append('num of Epochs') temp_list.append('acc_train') temp_list.append('f1_train') temp_list.append('acc_test') temp_list.append('f1_test') temp_list.append('weights') # infoList.append(temp_list) tempTrainMatrix = copy.deepcopy(trainMatrix) tempTrainLabels = copy.deepcopy(trainLabels) best_w = np.zeros(361) best_train_acc = 0 best_f1_train = 0 av_count = 1 #for noYesShuffle in range(2): total_numUpdates = 0 w = 0 for e_ind in range(numEpochs): #shuffling section start if e_ind == 0: shuffleTYPE = 'no shuffle' trainingExMat = trainMatrix trainExLabels = trainLabels else: shuffleTYPE = 'with shuffle' if e_ind == 0: trainingExMat = trainMatrix trainExLabels = trainLabels else: trainingExMat, trainExLabels = shuffle(tempTrainMatrix, tempTrainLabels, random_state=0) #trainingExMat, trainExLabels = shuffleFunction(tempTrainMatrix, tempTrainLabels) tempTrainMatrix = trainingExMat tempTrainLabels = trainExLabels # shuffling section end if e_ind == 0: weightCondition = initialWcondition else: weightCondition = w if algo_type == 'simple': w, numUpdates = simplePerceptron(trainingExMat, trainExLabels, weightCondition, learningRate) elif algo_type == 'margin': w, numUpdates = marginPerceptron(trainingExMat, trainExLabels, weightCondition, learningRate, margin) elif algo_type == 'aggressive': w, numUpdates = agrresiveMarginPerceptron(trainingExMat, trainExLabels, weightCondition, learningRate, margin) elif algo_type == 'weightedAggressive': w, numUpdates = WeightedAgrresiveMarginPerceptron( trainingExMat, trainExLabels, weightCondition, learningRate, margin) elif algo_type == 'weightedSimple': w, numUpdates = weightedPerceptron(trainingExMat, trainExLabels, weightCondition, learningRate) elif algo_type == 'winnow': w, numUpdates = winnowFunctions.simpleWinnow( trainingExMat, trainExLabels, 'ones', 2) total_numUpdates += numUpdates if algo_type == 'winnow': train_pred_labels = projectFunctions.prediction_Winnow( trainMatrix, trainLabels, w) acc_train = projectFunctions.accuracyMETRIC( trainLabels, train_pred_labels) test_pred_labels = projectFunctions.prediction_Winnow( testMatrix, testLabels, w) acc_test = projectFunctions.accuracyMETRIC(testLabels, test_pred_labels) dummyP, dummyR, f1_train = projectFunctions.f1METRIC( trainLabels, train_pred_labels) dummyP, dummyR, f1_test = projectFunctions.f1METRIC( testLabels, test_pred_labels) else: train_pred_labels = projectFunctions.prediction_Perceptron( trainMatrix, trainLabels, w) acc_train = projectFunctions.accuracyMETRIC( trainLabels, train_pred_labels) test_pred_labels = projectFunctions.prediction_Perceptron( testMatrix, testLabels, w) acc_test = projectFunctions.accuracyMETRIC(testLabels, test_pred_labels) dummyP, dummyR, f1_train = projectFunctions.f1METRIC( trainLabels, train_pred_labels) dummyP, dummyR, f1_test = projectFunctions.f1METRIC( testLabels, test_pred_labels) #for info ARRAY temp_list = [] temp_list.append(shuffleTYPE) temp_list.append(e_ind + 1) temp_list.append(acc_train) temp_list.append(f1_train) temp_list.append(acc_test) temp_list.append(f1_test) temp_list.append(w) # infoList.append(temp_list) ''' if acc_train >= best_train_acc: best_train_acc = acc_train w_ATbestTRAINacc = w train_acc_ATbestTRAINacc = acc_train test_acc_ATbestTRAINacc = acc_test eval_acc_ATbestTRAINacc = acc_eval train_f1_ATbestTRAINacc = f1_train test_f1_ATbestTRAINacc = f1_test eval_f1_ATbestTRAINacc = f1_eval ''' if f1_train >= best_f1_train: best_shuffle_type = shuffleTYPE best_epoch_index = e_ind best_acc_train = acc_train best_f1_train = f1_train best_acc_test = acc_test best_f1_test = f1_test best_w = w best_LIST = [ best_shuffle_type, best_epoch_index, best_acc_train, best_f1_train, best_acc_test, best_f1_test, best_w ] return best_LIST
max_depth = max(depth_list) list_of_labels = list(Counter(train_actual_labels)) predicted_train_list = treeFunctions.predictFUNCTION(tree, train_matrix, list_of_labels, major_trainLABEL) predicted_test_list = treeFunctions.predictFUNCTION(tree, test_matrix, list_of_labels, major_trainLABEL) predicted_eval_list = treeFunctions.predictFUNCTION(tree, eval_matrix, list_of_labels, major_trainLABEL) # train_accuracy = projectFunctions.accuracyMETRIC(train_actual_labels, predicted_train_list) train_f1_score = projectFunctions.f1METRIC(train_actual_labels, predicted_train_list) test_accuracy = projectFunctions.accuracyMETRIC(test_actual_labels, predicted_test_list) test_f1_score = projectFunctions.f1METRIC(test_actual_labels, predicted_test_list) eval_accuracy = projectFunctions.accuracyMETRIC(eval_actual_labels, predicted_eval_list) eval_f1_score = projectFunctions.f1METRIC(eval_actual_labels, predicted_eval_list) solution_filename = raw_input( 'Enter x__x in ./solutions_log/x__x_solutions.csv: ') projectFunctions.write_solutions( 'decision tree', predicted_eval_list, './solutions_log/solutions/' + solution_filename + '.solutions.csv') #np.save('trees.npy', tree)
max_depth = max(depth_list) list_of_labels = list(Counter(train_labels)) predicted_train_list = treeFunctions.predictFUNCTION(tree, train_matrix, list_of_labels, major_trainLABEL) predicted_test_list = treeFunctions.predictFUNCTION(tree, test_matrix, list_of_labels, major_trainLABEL) predicted_eval_list = treeFunctions.predictFUNCTION(tree, eval_matrix, list_of_labels, major_trainLABEL) # train_accuracy = projectFunctions.accuracyMETRIC(train_labels, predicted_train_list) dunn, gi, train_f1_score = projectFunctions.f1METRIC(train_labels, predicted_train_list) test_accuracy = projectFunctions.accuracyMETRIC(test_labels, predicted_test_list) f, j, test_f1_score = projectFunctions.f1METRIC(test_labels, predicted_test_list) print 'Train f1: ' print train_f1_score print 'Train Accuracy: ' print train_accuracy print 'Test f1: ' print test_f1_score print 'Test Accuracy: ' print test_accuracy stopTime = timeit.default_timer() print 'time: ', stopTime - startTime
def simplePerceptron_cvRate(train_matrix, train_actual_labels, rateRange, num_folds): av_accuracy_list = [] av_f1_list = [] for rate in rateRange: print 'rate: ', rate ''' ############### kf = KFold(n_splits=5) temp_train_Matrix = copy.deepcopy(train_matrix) temp_train_Labels = copy.deepcopy(train_actual_labels) count = 0 accuracy_list = [] for train_indices, test_indices in kf.split(temp_train_Matrix): temp_test_Matrix = [] temp_test_Labels = [] for test_indx in test_indices: count +=1 #print 'count: ', count temp_test_Matrix.append(temp_train_Matrix[test_indx-count]) temp_test_Labels.append(temp_train_Labels[test_indx-count]) np.delete(temp_train_Matrix, test_indx-count, 0) np.delete(temp_train_Labels, test_indx-count, 0) ################ ''' accuracy_list = [] f1_list = [] training_lab = copy.deepcopy(train_actual_labels) training_lab = training_lab.tolist() training_mat = copy.deepcopy(train_matrix) training_mat = training_mat.tolist() subset_size = len(training_mat) / num_folds for i in range(num_folds): # if i == num_folds - 1: temp_test_Matrix = training_mat[(i) * subset_size:] temp_train_Matrix = training_mat[0:][:i * subset_size] # temp_test_Labels = training_lab[(i) * subset_size:] temp_train_Labels = training_lab[0:][:i * subset_size] else: temp_test_Matrix = training_mat[i * subset_size:][:subset_size] #temp_train_Matrix =np.append(training_mat[:i * subset_size],training_mat[(i + 1) * subset_size:]) temp_train_Matrix = training_mat[:i * subset_size] + training_mat[ (i + 1) * subset_size:] # temp_test_Labels = training_lab[i * subset_size:][:subset_size] temp_train_Labels = training_lab[:i * subset_size] + training_lab[ (i + 1) * subset_size:] # Training w_train, numUpdates_simpleP = percepFunctions.weightedPerceptron( temp_train_Matrix, temp_train_Labels, 'zeros', rate) predicted_labels = projectFunctions.prediction_Perceptron( temp_test_Matrix, temp_test_Labels, w_train) # Metric #test_accuracy = projectFunctions.accuracyMETRIC(temp_test_Labels, predicted_labels) test_f1 = projectFunctions.f1METRIC(temp_test_Labels, predicted_labels) #accuracy_list.append(test_accuracy) f1_list.append(test_f1) #av_accuracy = np.mean(accuracy_list) av_f1 = np.mean(f1_list) #av_accuracy_list.append(av_accuracy) av_f1_list.append(av_f1) #print av_accuracy print av_f1 #best_accuracy = max(av_accuracy_list) best_f1 = max(av_f1_list) #max_index = av_accuracy_list.index(best_accuracy) max_index = av_f1_list.index(best_f1) best_rate = rateRange[max_index] # #line, = plt.plot(rateRange, av_accuracy_list) line, = plt.plot(rateRange, av_f1_list) plt.xlabel('Learning Rate') #plt.ylabel('Accuracy') #plt.title('Accuracy v/s Learning Rate, for Simple perceptron') plt.ylabel('F1 score') plt.title('F1 score v/s Learning Rate, for Simple Weighted perceptron') plt.grid(True) plt.show() #return best_accuracy, best_rate, av_accuracy_list return best_f1, best_rate, av_f1_list
bestEpochs = epochsCV ; bestGamma = gamma_; bestC = C_; bestAccuracy = accuracy wCV = svmFunctions.SVM(train_matrix, train_labels, epochsCV, bestC, bestGamma) pred_train_list = svmFunctions.prediction(train_matrix, train_labels, wCV) pred_test_list = svmFunctions.prediction(test_matrix, test_labels, wCV) acc_train = svmFunctions.accuracyMETRIC(train_labels, pred_train_list) acc_test = svmFunctions.accuracyMETRIC(test_labels, pred_test_list) dummyP, dummyR, f1_train = projectFunctions.f1METRIC(train_labels, pred_train_list) dummyP, dummyR, f1_test = projectFunctions.f1METRIC(test_labels, pred_test_list) print 'Best gamma(0) : ', bestGamma,' Best C: ', bestC,' Best epochs: ', bestEpochs,' Train Acc:', acc_train,' Train f1:', f1_train,' Test Acc', acc_test,'% Test f1:', f1_test endTime = timeit.default_timer(); print 'Total run time: ', endTime - startTime,'secs' ''' print 'Final Support Vector Machines:' epochsFINAL = 20 CFINAL = 11000 gammaFINAL = 1e-06 wFINAL = svmFunctions.SVM(train_matrix, train_labels, epochsFINAL, CFINAL, gammaFINAL) pred_train_list = projectFunctions.prediction_Perceptron( train_matrix, train_labels, wFINAL) pred_test_list = projectFunctions.prediction_Perceptron( test_matrix, test_labels, wFINAL) acc_train = projectFunctions.accuracyMETRIC(train_labels, pred_train_list) acc_test = projectFunctions.accuracyMETRIC(test_labels, pred_test_list) dummyP, dummyR, f1_train = projectFunctions.f1METRIC(train_labels, pred_train_list) dummyP, dummyR, f1_test = projectFunctions.f1METRIC(test_labels, pred_test_list) print '\tBest gamma(0) : ', gammaFINAL, ' Best C: ', CFINAL, ' Train Acc:', acc_train, ' Train f1:', f1_train, ' Test Acc', acc_test, '% Test f1:', f1_test 1e-06