def runClassifier(self, X_train, Y_train, X_test, Y_test): classalgs = { 'Logistic_Regression': algs.LogisticRegression1(), 'Gauassian_SVM': algs.SVM1(), 'Logistic_Regression_weighted ': algs.LogisticRegression1wt(), 'Gauassian_SVM_weighted': algs.SVM1wt() } for learnername, learner in classalgs.iteritems(): print 'Running learner = ' + learnername learner.learn(X_train, Y_train) predictions = learner.predict(X_test) recall = util.getRecall(Y_test, predictions) print '\n Recall for ' + learnername + ': ' + str(recall) precision = util.getPrecision(Y_test, predictions) print '\n Precision for ' + learnername + ': ' + str(precision) f5_score = util.getF5(precision, recall) print '\n F5 Score for ' + learnername + ': ' + str(f5_score) AUCROC = util.getAUCROC(Y_test, predictions) print '\n AUC ROC Score for ' + learnername + ': ' + str(AUCROC) AUCROCPlotPoints = util.getAUCROCPlotPoints(Y_test, predictions) print('\n tpr : {0} fpr : {1} auc_roc : {2} learnername : {3}' ).format(AUCROCPlotPoints[0], AUCROCPlotPoints[1], AUCROCPlotPoints[2], learnername) self.plotGraph(AUCROCPlotPoints, learnername) f1_score = util.fscore(Y_test, predictions) print '\n f1_score for ' + learnername + ': ' + str(f1_score) accuracy = util.getaccuracy(Y_test, predictions) print 'Accuracy for ' + learnername + ': ' + str(accuracy)
def runClassifier(self,X_train,Y_train,X_test,Y_test): classalgs = { 'Logistic_Regression' : algs.LogisticRegression1(), 'Gauassian_SVM' :algs.SVM1(), 'Logistic_Regression_weighted ' : algs.LogisticRegression1wt(), 'Gauassian_SVM_weighted' :algs.SVM1wt() } for learnername, learner in classalgs.iteritems(): print 'Running learner = ' + learnername learner.learn(X_train, Y_train) predictions = learner.predict(X_test) recall = util.getRecall(Y_test, predictions) print '\n Recall for ' + learnername + ': ' + str(recall) precision=util.getPrecision(Y_test, predictions) print '\n Precision for ' + learnername + ': ' + str(precision) f5_score=util.getF5(precision,recall) print '\n F5 Score for ' + learnername + ': ' + str(f5_score) AUCROC =util.getAUCROC(Y_test, predictions) print '\n AUC ROC Score for ' + learnername + ': ' + str(AUCROC) AUCROCPlotPoints =util.getAUCROCPlotPoints(Y_test, predictions) print('\n tpr : {0} fpr : {1} auc_roc : {2} learnername : {3}').format(AUCROCPlotPoints[0],AUCROCPlotPoints[1],AUCROCPlotPoints[2],learnername) self.plotGraph(AUCROCPlotPoints,learnername) f1_score = util.fscore(Y_test, predictions) print '\n f1_score for ' + learnername + ': ' + str(f1_score) accuracy = util.getaccuracy(Y_test, predictions) print 'Accuracy for ' + learnername + ': ' + str(accuracy)
validation_sets = [u1_test, u2_test, u3_test, u4_test, u5_test] validation_answers = [u1_test_ans, u2_test_ans, u3_test_ans, u4_test_ans, u5_test_ans] print('Data has been loaded, now running algorithm.') k = 161 n_folds = 5 accuracy_total = 0 naive_accuracy_total = 0 t0 = time.time() for i in range(0, n_folds): print('Making predictions for the ' + str(i+1) + '-th fold') predictions, naive_rating = alg.predict_from_set(X_train=datasets[i], X_test=validation_sets[i], movie_info=movie_info, k=k, distance='euclidean') print('Computing Accuracy for predictions') accuracy = utils.getaccuracy(validation_answers[i], predictions) naive_accuracy = utils.get_mean_accuracy(validation_answers[i], naive_rating) print('Accuracy for ' + str(i + 1) + '-th fold: ' + str(accuracy)) print('Naive Accuracy for ' + str(i + 1) + '-th fold: ' + str(naive_accuracy)) accuracy_total += accuracy naive_accuracy_total += naive_accuracy t1 = time.time() # this will make a prediction for every user for every movie, dataset can be any specified set # that is of the form of the datasets in the 'datasets' array above # need this to run on all users # users = user_data[['user_id']].get_values() # predictions, naive_rating = alg.predict_all_users(dataset, users, movie_info, k, distance='euclidean') # write our predictions to a csv file # print('writing results to csv...')
accuracyD={} for learnername, learner in classalgs.iteritems(): print 'Running learner = ' + learnername learner.learn(Xtrain, Ytrain) predictions = learner.predict(Xtest) recall = util.getRecall(Ytest, predictions) print '\n Recall for ' + learnername + ': ' + str(recall) AUCROC =util.getAUCROC(Ytest, predictions) print '\n AUC ROC Score for ' + learnername + ': ' + str(AUCROC) AUCROCPlotPoints =util.getAUCROCPlotPoints(Ytest, predictions) print('\n tpr : {0} fpr : {1} auc_roc : {2} learnername : {3}').format(AUCROCPlotPoints[0],AUCROCPlotPoints[1],AUCROCPlotPoints[2],learnername) self.plotGraph(AUCROCPlotPoints,learnername) f1_score = util.fscore(Ytest, predictions) print '\n f1_score for ' + learnername + ': ' + str(f1_score) accuracy = util.getaccuracy(Ytest, predictions) print 'Accuracy for ' + learnername + ': ' + str(accuracy) accuracyD[learnername]=AUCROC FoldAccuracy[i]=accuracyD i=i+1 self.StatisticalSignificance(FoldAccuracy) def plotGraph(self,AUCROCPlotPoints,learnerName): ''' Plot the AUC-ROC plot for each classifier. ''' fpr=AUCROCPlotPoints[0] tpr=AUCROCPlotPoints[1] roc_auc= metrics.auc(fpr, tpr) plt.figure()
def classify(): # init variables run = True plot = True trainsize = 12500 testsize = 12500 numruns = 1 k_fold = False dataset_file = "data.csv" classalgs = {'Logistic Regression': algs.LogitReg()} numalgs = len(classalgs) num_steps = 1 parameters = ( { 'regularizer': 'None', 'stepsize': 0.001, 'num_steps': num_steps, 'batch_size': 2 }, #{'regularizer': 'None', 'stepsize':0.01, 'num_steps':300, 'batch_size':20}, ) numparams = len(parameters) accuracy = {} for learnername in classalgs: accuracy[learnername] = np.zeros((numparams, numruns)) # load dataset & shuffle dataset = dp.readcsv(dataset_file) Y = cc.getData("ia_success") Y = np.array(Y).astype(np.float) #X = cc.getListedData("fbp_HFI") X = cc.getListedDataList([ 'fbp_CFB', 'fbp_CFC', 'fbp_HFI', 'fbp_RAZ', 'fbp_ROS', 'fbp_SFC', 'fbp_TFC', 'fbp_HFI_class' ]) #X = cc.getListedDataList(['assessment_result', 'max_size', 'first_size', 'first_status_held', 'sec_to_uc', 'aircraft_n_Fixed', 'aircraft_n_Rotary', 'aircraft_n_total', 'aircraft_hr_Fixed', 'aircraft_hr_Rotary', 'aircraft_hr_total', 'n_firefighters', 'n_non_firefighters', 'hr_firefighters', 'hr_non_firefighters', 'drop_amount_retardant', 'drop_amount_water', 'drop_amount_total', 'n_fire_past_1', #'n_fire_past_7', 'n_fire_past_30', 'response_time', 'general_cause', 'year', 'month', 'latitude', 'longitude', 'assessment_size', 'fire_spread_rate', 'fire_position_on_slope', 'temperature', 'relative_humidity', 'wind_direction', 'wind_speed', 'weather_conditions_over_fire', 'equipment_Transportation', 'equipment_Water_Delivery', 'equipment_Sustained_Action', 'equipment_Fire_Guard_Building', #'equipment_Crew_Gear', 'equipment_Base_Camp', 'equipment_WaterTruck_Transportation', 'wstation_dry_bulb_temperature', 'wstation_relative_humidity', 'wstation_wind_speed_kmh', 'wstation_wind_direction', 'wstation_precipitation', 'wstation_fine_fuel_moisture_code', 'wstation_duff_moisture_code', 'wstation_drought_code', 'wstation_build_up_index', 'wstation_initial_spread_index', 'wstation_fire_weather_index', 'wstation_daily_severity_rating', 'fuelgrid_C', 'fuelgrid_D', 'fuelgrid_M', 'fuelgrid_Nonfuel', 'fuelgrid_O', #'fuelgrid_S', 'fuelgrid_Unclassified', 'fuelgrid_Water', 'fuel_type2', 'grouped_fuel_type2', 'fbp_CFB', 'fbp_CFC', 'fbp_FD', 'fbp_HFI', 'fbp_RAZ', 'fbp_ROS', 'fbp_SFC', 'fbp_TFC', 'fbp_HFI_class', 'fuel_type', 'grouped_fuel_type', 'test_i' #]) X = cc.getListedDataList([ 'max_size', 'first_size', 'first_status_held', 'sec_to_uc', 'aircraft_n_Fixed', 'aircraft_n_Rotary', 'aircraft_n_total', 'aircraft_hr_Fixed', 'aircraft_hr_Rotary', 'aircraft_hr_total', 'n_firefighters', 'n_non_firefighters', 'hr_firefighters', 'hr_non_firefighters', 'drop_amount_retardant', 'drop_amount_water', 'drop_amount_total', 'n_fire_past_1', 'n_fire_past_7', 'n_fire_past_30', 'response_time', 'general_cause', 'year', 'month', 'latitude', 'longitude', 'assessment_size', 'fire_spread_rate', 'fire_position_on_slope', 'temperature', 'relative_humidity', 'wind_direction', 'wind_speed', 'weather_conditions_over_fire', 'equipment_Transportation', 'equipment_Water_Delivery', 'equipment_Sustained_Action', 'equipment_Fire_Guard_Building', 'equipment_Crew_Gear', 'equipment_Base_Camp', 'equipment_WaterTruck_Transportation', 'wstation_dry_bulb_temperature', 'wstation_relative_humidity', 'wstation_wind_speed_kmh', 'wstation_wind_direction', 'wstation_precipitation', 'wstation_fine_fuel_moisture_code', 'wstation_duff_moisture_code', 'wstation_drought_code', 'wstation_build_up_index', 'wstation_initial_spread_index', 'wstation_fire_weather_index', 'wstation_daily_severity_rating', 'fuelgrid_C', 'fuelgrid_D', 'fuelgrid_M', 'fuelgrid_Nonfuel', 'fuelgrid_O', 'fuelgrid_S', 'fuelgrid_Unclassified', 'fuelgrid_Water', 'fuel_type2', 'grouped_fuel_type2', 'fbp_CFB', 'fbp_CFC', 'fbp_FD', 'fbp_HFI', 'fbp_RAZ', 'fbp_ROS', 'fbp_SFC', 'fbp_TFC', 'fbp_HFI_class', 'fuel_type', 'grouped_fuel_type', 'test_i' ]) #print(X) X = np.array(X).astype(np.float) #trainX, testX = pickle. load(open(dataset_file, "rb")) #trainY = np.append(np.zeros(len(trainX[0][2500:])),np.ones(len(trainX[1][2500:]))) #testY = np.append(np.zeros(len(testX[0])),np.ones(len(testX[1]))) #valY = np.append(np.zeros(2500),np.ones(2500)) #valX = np.append(trainX[0][:2500], trainX[1][:2500], axis=0) #trainX = np.append(trainX[0][2500:], trainX[1][2500:], axis=0) #testX = np.append(testX[0], testX[1], axis=0) np.random.seed(3111) np.random.shuffle(X) np.random.seed(3111) np.random.shuffle(Y) trainX = X[:len(X) // 2] valX = X[len(X) // 2:len(X) * 3 // 4] testX = X[len(X) * 3 // 4:] trainY = Y[:len(Y) // 2] valY = Y[len(Y) // 2:len(Y) * 3 // 4] testY = Y[len(Y) * 3 // 4:] # Run if run: for r in range(numruns): print( ('Running on train={0}, val={1}, test={2} samples for run {3}' ).format(trainX.shape[0], valX.shape[0], testX.shape[0], r)) # test different parameters (only one for this assignment) for p in range(numparams): params = parameters[p] # only one algorithm for now for learnername, learner in classalgs.items(): # Reset learner for new parameters learner.reset(params) print('Running learner = ' + learnername + ' on parameters ' + str(learner.getparams())) # Train model #print("trainset0: ", trainset[0]) learner.learn(trainX, trainY, valX, valY, testX, testY) # Test model predictions = learner.predict(testX) acc = utils.getaccuracy(testY, predictions) print('accuracy for ' + learnername + ': ' + str(acc)) accuracy[learnername][p, r] = acc # plot if plot == True: print("PLOT!") accuracy_val, accuracy_test, accuracy_train, best_accuracy, best_weight = pickle.load( open("learning_acc.pkl", "rb")) print("best_accuracy : val,train,test", accuracy_val, accuracy_train, accuracy_test) epi = np.arange(0, num_steps, 1) plt.plot(epi, accuracy_val, label='validation accuracy : 1') plt.plot(epi, accuracy_test, label='test accuracy : 2') plt.plot(epi, accuracy_train, label='train accuracy : 3') plt.xlabel('epochs') plt.ylabel('Accuracy %') plt.legend() plt.show()