target = data['TripType'] del data['TripType'] X, y = data, target ch2 = SelectKBest(chi2, k=150) X = ch2.fit_transform(X, y) # Splitting the data to train and test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=42) kf = KFold(n=70686, n_folds=10, shuffle=False, random_state=42) count = 0 total_roc=0 for train_index, test_index in kf: X_train_fold, X_test_fold, y_train_fold, y_test_fold = X[train_index], X[test_index], y[train_index], y[test_index] print("Calling get Roc. C Value::\t" + str(C)) result_ROC = getROCScore(X_train_fold, y_train_fold, X_test_fold, y_test_fold, "LogisticRegression", Cvalue=10000) roc_Micro_score = result_ROC[0] print("ROC on Validation set ",roc_Micro_score) total_roc+=roc_Micro_score print("Average ROC Score is (Validation ROC) : ",total_roc/10) result_ROC =getROCScore(X_train, y_train, X_test, y_test, "LogisticRegression", Cvalue=10000) print("ROC score on test set is ",result_ROC[0])
ch2 = SelectKBest(chi2, k=130) X = ch2.fit_transform(X, y) # Splitting the data to train and test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=42) kf = KFold(n=70686, n_folds=10, shuffle=False, random_state=42) i=0 count = 0 max_score = 0 best_model = -1 best_CValue = 0 C_Value=[0.0001,0.001, 0.01, 0.1, 1, 10, 100, 1000,10000,100000] total_roc=0 for train_index, test_index in kf: count +=1 C = C_Value[i] i+=1 X_train_fold, X_test_fold, y_train_fold, y_test_fold = X[train_index], X[test_index], y[train_index], y[test_index] result_ROC = getROCScore(X_train_fold, y_train_fold, X_test_fold, y_test_fold, "LinearSVC", Cvalue=100000) roc_Micro_score = result_ROC[0] print("ROC on Validation set ",roc_Micro_score) total_roc+=roc_Micro_score print("Average ROC Score is (Validation ROC) : ",total_roc/10) result_ROC =getROCScore(X_train, y_train, X_test, y_test, "LinearSVC", Cvalue=100000) print("ROC score on test set is ",result_ROC[0])
print("Feature Count::\t" +str(X_features)) X_features=150 ch2 = SelectKBest(chi2, k=X_features) X = ch2.fit_transform(X, y) # Splitting the data to train and test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=42) # Decision Tree print("Decision Tree model") # d = clf.fit(X_train, y_train) print("starting predication") # Y_predicted = d.predict(X_test) print("Decision Tree Accuracy is") result_ROC = getROCScore(X_train, y_train, X_test, y_test, "DecisionTree", depth=10) print result_ROC[0] print("\n") #Linear Regression # regr=OneVsRestClassifier(linear_model.LogisticRegression(C=10000)) print("Logistic Regression model") # d = regr.fit(X_train, y_train) print("starting predication") # Y_predicted = d.predict(X_test) print("Logistic Regression model Accuracy is") result_ROC = getROCScore(X_train, y_train, X_test, y_test, "LogisticRegression", Cvalue=10000) print result_ROC[0] print("\n")
ch2 = SelectKBest(chi2, k=150) X = ch2.fit_transform(X, y) # Splitting the data to train and test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=42) kf = KFold(n=len(X_train), n_folds=10, shuffle=False, random_state=42) count = 0 max_score = 0 best_model = -1 best_alphaValue = 0 alpha_Value=[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.9, 1] i=0 total_roc=0 for train_index, test_index in kf: count +=1 alpha = alpha_Value[i] i+=1 X_train_fold, X_test_fold, y_train_fold, y_test_fold = X[train_index], X[test_index], y[train_index], y[test_index] result_ROC = getROCScore(X_train_fold, y_train_fold, X_test_fold, y_test_fold, "NaiveBayes", alphaValue=0.6) roc_Micro_score = result_ROC[0] print("ROC on Validation set ",roc_Micro_score) total_roc+=roc_Micro_score print("Average ROC Score is (Validation ROC) : ",total_roc/10) result_ROC =getROCScore(X_train, y_train, X_test, y_test, "NaiveBayes", alphaValue=0.6) print("ROC score on test set is ",result_ROC[0])
X, y = data, target ch2 = SelectKBest(chi2, k='all') X = ch2.fit_transform(X, y) # Splitting the data to train and test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=42) kf = KFold(n=70686, n_folds=10, shuffle=False, random_state=42) i=0 count = 0 total_roc=0 for train_index, test_index in kf: count +=1 X_train_fold, X_test_fold, y_train_fold, y_test_fold = X[train_index], X[test_index], y[train_index], y[test_index] result_ROC =getROCScore(X_train_fold, y_train_fold, X_test_fold, y_test_fold, "DecisionTree") roc_Micro_score = result_ROC[0] print("ROC on Validation set ",roc_Micro_score) total_roc+=roc_Micro_score print("Average ROC Score is (Validation ROC) : ",total_roc/10) result_ROC =getROCScore(X_train, y_train, X_test, y_test, "DecisionTree") print("ROC score on test set is ",result_ROC[0])