def prob_tester(): # Get Files ldir = get_mat_root() + "mlv2/threshbin/" gmitype = 'gmi5' # use quantile-based MI threshold winsize = '2' state_switch = 's1' # select the seizure state # randomState = 42 # fix random state np.random.seed(42) # fix randomness th = 90 # select the MI threshold # SVC # kern = 'linear' # clf = SVC(kernel=kern, probability=True) # Logistic Regression pen = 'l2' clf = LogisticRegression(penalty=pen) # Random Forest # numEsts = 100 # clf = RandomForestClassifier(n_estimators=numEsts) # AdaBoost # algo = "SAMME" # numEsts = 100 # base_clf = DecisionTreeClassifier(max_depth=1) # clf = AdaBoostClassifier(base_estimator=base_clf, n_estimators=numEsts, algorithm=algo) # Train model X_train, y_train, X_test, y_test = gmi_dataset_extract(ldir, gmitype, winsize, th, state_switch, interTestSize=0.66) clf.fit(X_train, y_train)
def test_fullseizextract(): ldir = get_mat_root() + 'mlv2/fullseiz/th/' patient = 'DV' seizure = '10' mf = full_seizure_extract(ldir, patient, seizure, 90) print('dummy line')
def rec_test_result(save_type, loc, in_dict): saveloc = get_mat_root() + loc ttl = save_type + '_' + datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S") # ttl = save_type + '_' + datetime.datetime.now().strftime("%Y-%m-%d_") + append_string # need append_string as input sv_dict = dict_cleaner(in_dict) # print('Saving .mat file...') sio.savemat(saveloc + ttl, sv_dict) # print('Saving .mat file... Done.') return None
def my_process(numEsts): ldir = get_mat_root() + "mlv2/threshbin/" gmitype = 'gmi5' # use quantile-based MI threshold winsize = '2' state_switch = 's1' # select the seizure state inter_test_size = 0.66 max_windows = 9 # randomState = 42 # fix random state np.random.seed(42) # fix randomness th = 95 # select the MI threshold clf = RandomForestClassifier(n_estimators=numEsts, n_jobs=-1) # AdaBoost # algo = "SAMME" # numEsts = 100 # base_clf = DecisionTreeClassifier(max_depth=1) # clf = AdaBoostClassifier(base_estimator=base_clf, n_estimators=numEsts, algorithm=algo) X_train, y_train, X_test, y_test = gmi_dataset_extract( ldir, gmitype, winsize, th, state_switch, inter_test_size, max_windows) clf.fit(X_train, y_train) probas_ = clf.predict_proba(X_test) fpr, tpr, thresholds = roc_curve(y_test, probas_[:, 1]) roc_auc = auc(fpr, tpr) savedir = "mlv2/rocbin/num_est/" savetype = "TEST" comment = dict() comment['ClassifierType'] = clf.__class__.__name__ comment['MaxWindows'] = float(max_windows % 5) if "RandomForest" in comment['ClassifierType']: comment['number_estimators'] = numEsts elif "AdaBoost" in comment['ClassifierType']: comment['BaseClassifier'] = base_clf.__class__.__name__ comment['number_estimators'] = numEsts comment['algorithm'] = algo elif "LogisticRegression" in comment['ClassifierType']: comment['penalty'] = pen elif "SVC" in comment['ClassifierType']: comment['kernel'] = kern comment['StateSwitch'] = state_switch appendString = ''.join([state_switch, str(numEsts)]) rec_test_result( savetype, savedir, { 'comment': comment, 'fpr': fpr, 'th': th, 'tpr': tpr, 'rocth': thresholds, 'rocauc': roc_auc }, appendString)
def my_process(numEsts): ldir = get_mat_root() + "mlv2/threshbin/" gmitype = 'gmi5' winsize = '2' state_switch = 's1' inter_test_size = 0.66 max_windows = 9 np.random.seed(42) # fix randomness clf = RandomForestClassifier(n_estimators=numEsts, n_jobs=-1) print(clf.__class__.__name__ + " Threshold %0.0d/100" % th) # get seizure epochs from each patient with predetermined training/testing division X_train, y_train, X_test, y_test = gmi_dataset_extract( ldir, gmitype, winsize, th, state_switch, inter_test_size, max_windows) cv = StratifiedKFold(y_train, n_folds=5) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] for i, (train, test) in enumerate(cv): probas_ = clf.fit(X_train[train], y_train[train]).predict_proba(X_train[test]) fpr, tpr, thresholds = roc_curve(y_train[test], probas_[:, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) # plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) # plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') mean_tpr /= len(cv) mean_tpr[-1] = 1.0 score = auc(mean_fpr, mean_tpr) # plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % score, lw=2) # PLOTTING # plt.xlim([-0.05, 1.05]) # plt.ylim([-0.05, 1.05]) # plt.xlabel('False Positive Rate') # plt.ylabel('True Positive Rate') # plt.title('Receiver operating characteristic example') # plt.legend(loc="lower right") # plt.show() # score = cross_val_score(clf, X_train, y_train.ravel(), cv=5) # print("Accuracy: %0.4f (+/- %0.4f)" % (100*score.mean(), 100*score.std() * 2)) print(score) scores.append(score)
def my_process(numEsts): ldir = get_mat_root() + "mlv2/threshbin/" gmitype = 'gmi5' winsize = '2' state_switch = 's1' inter_test_size = 0.66 max_windows = 9 np.random.seed(42) # fix randomness clf = RandomForestClassifier(n_estimators=numEsts, n_jobs=-1) print(clf.__class__.__name__ + " Threshold %0.0d/100" % th) # get seizure epochs from each patient with predetermined training/testing division X_train, y_train, X_test, y_test = gmi_dataset_extract(ldir, gmitype, winsize, th, state_switch, inter_test_size, max_windows) cv = StratifiedKFold(y_train, n_folds=5) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] for i, (train, test) in enumerate(cv): probas_ = clf.fit(X_train[train], y_train[train]).predict_proba(X_train[test]) fpr, tpr, thresholds = roc_curve(y_train[test], probas_[:, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) # plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) # plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') mean_tpr /= len(cv) mean_tpr[-1] = 1.0 score = auc(mean_fpr, mean_tpr) # plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % score, lw=2) # PLOTTING # plt.xlim([-0.05, 1.05]) # plt.ylim([-0.05, 1.05]) # plt.xlabel('False Positive Rate') # plt.ylabel('True Positive Rate') # plt.title('Receiver operating characteristic example') # plt.legend(loc="lower right") # plt.show() # score = cross_val_score(clf, X_train, y_train.ravel(), cv=5) # print("Accuracy: %0.4f (+/- %0.4f)" % (100*score.mean(), 100*score.std() * 2)) print(score) scores.append(score)
def my_process(numEsts): ldir = get_mat_root() + "mlv2/threshbin/" gmitype = 'gmi5' # use quantile-based MI threshold winsize = '2' state_switch = 's1' # select the seizure state inter_test_size = 0.66 max_windows = 9 # randomState = 42 # fix random state np.random.seed(42) # fix randomness th = 95 # select the MI threshold clf = RandomForestClassifier(n_estimators=numEsts, n_jobs=-1) # AdaBoost # algo = "SAMME" # numEsts = 100 # base_clf = DecisionTreeClassifier(max_depth=1) # clf = AdaBoostClassifier(base_estimator=base_clf, n_estimators=numEsts, algorithm=algo) X_train, y_train, X_test, y_test = gmi_dataset_extract(ldir, gmitype, winsize, th, state_switch, inter_test_size, max_windows) clf.fit(X_train, y_train) probas_ = clf.predict_proba(X_test) fpr, tpr, thresholds = roc_curve(y_test, probas_[:, 1]) roc_auc = auc(fpr, tpr) savedir = "mlv2/rocbin/num_est/" savetype = "TEST" comment = dict() comment['ClassifierType'] = clf.__class__.__name__ comment['MaxWindows'] = float(max_windows % 5) if "RandomForest" in comment['ClassifierType']: comment['number_estimators'] = numEsts elif "AdaBoost" in comment['ClassifierType']: comment['BaseClassifier'] = base_clf.__class__.__name__ comment['number_estimators'] = numEsts comment['algorithm'] = algo elif "LogisticRegression" in comment['ClassifierType']: comment['penalty'] = pen elif "SVC" in comment['ClassifierType']: comment['kernel'] = kern comment['StateSwitch'] = state_switch appendString = ''.join([state_switch, str(numEsts)]) rec_test_result(savetype, savedir, {'comment': comment, 'fpr': fpr, 'th': th, 'tpr': tpr, 'rocth': thresholds, 'rocauc': roc_auc}, appendString)
# import matplotlib.pyplot as plt from sklearn.cross_validation import StratifiedKFold from sklearn.metrics import auc, roc_curve from sklearn.ensemble import AdaBoostClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression from scipy import interp from matransfer import gmi_dataset_extract from record_data import rec_test_result from get_root_dir import get_mat_root from joblib import Parallel, delayed import multiprocessing ldir = get_mat_root() + "mlv2/threshbin/" gmitype = 'gmi5' winsize = '2' state_switch = 's1' inter_test_size = 0.66 max_windows = 9 np.random.seed(42) # fix randomness # SVC # kern = 'linear' # clf = SVC(kernel=kern, probability=True) # Logistic Regression # pen = 'l2' # clf = LogisticRegression(penalty=pen) # Random Forest numEsts = 100 clf = RandomForestClassifier(n_estimators=numEsts)
import numpy as np # import matplotlib.pyplot as plt from sklearn.metrics import auc, roc_curve from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression # from sklearn.naive_bayes import BernoulliNB from matransfer import gmi_dataset_extract, full_seizure_extract, full_seizure_detect_save from get_root_dir import get_mat_root from record_data import rec_test_result # Get Files ldir = get_mat_root() + "mlv2/threshbin/" gmitype = 'gmi5' # use quantile-based MI threshold winsize = '2' state_switch = 's1' # select the seizure state # randomState = 42 # fix random state np.random.seed(42) # fix randomness th = 94 # select the MI threshold maxWindows = 10 # SVC # kern = 'linear' # clf = SVC(kernel=kern, probability=True) # Logistic Regression # pen = 'l2' # clf = LogisticRegression(penalty=pen) # Random Forest numEsts = 100 clf = RandomForestClassifier(n_estimators=numEsts) # AdaBoost
import numpy as np # import matplotlib.pyplot as plt from sklearn.metrics import auc, roc_curve from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression # from sklearn.naive_bayes import BernoulliNB from matransfer import gmi_dataset_extract from get_root_dir import get_mat_root from record_data import rec_test_result from joblib import Parallel, delayed import multiprocessing ldir = get_mat_root() + "mlv2/threshbin/" gmitype = 'gmi5' # use quantile-based MI threshold winsize = '2' state_switch = 's1' # select the seizure state inter_test_size = 0.66 max_windows = 10 np.random.seed(42) # fix randomness th = 95 # select the MI threshold # Random Forest numEsts = 100 clf = RandomForestClassifier(n_estimators=numEsts, n_jobs=-1) # AdaBoost # algo = "SAMME" # numEsts = 100 # base_clf = DecisionTreeClassifier(max_depth=1) # clf = AdaBoostClassifier(base_estimator=base_clf, n_estimators=numEsts, algorithm=algo)
import numpy as np # import matplotlib.pyplot as plt from sklearn.metrics import auc, roc_curve from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression # from sklearn.naive_bayes import BernoulliNB from matransfer import gmi_dataset_extract, full_seizure_extract, full_seizure_detect_save from get_root_dir import get_mat_root from record_data import rec_test_result # Get Files ldir = get_mat_root() + "mlv2/threshbin/" gmitype = 'gmi5' # use quantile-based MI threshold winsize = '2' state_switch = 's2' # select the seizure state maxWindows = 10 # randomState = 42 # fix random state np.random.seed(42) # fix randomness th = 95 # SVC # kern = 'linear' # clf = SVC(kernel=kern, probability=True) # Logistic Regression # pen = 'l1' # clf = LogisticRegression(penalty=pen) # Random Forest numEsts = 100 clf = RandomForestClassifier(n_estimators=numEsts)
import numpy as np # import matplotlib.pyplot as plt from sklearn.metrics import auc, roc_curve from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression # from sklearn.naive_bayes import BernoulliNB from matransfer import gmi_dataset_extract, full_seizure_extract, full_seizure_detect_save from get_root_dir import get_mat_root from record_data import rec_test_result # Get Files ldir = get_mat_root() + "mlv2/threshbin/" gmitype = 'gmi5' # use quantile-based MI threshold winsize = '2' state_switch = 's1' # select the seizure state # randomState = 42 # fix random state np.random.seed(42) # fix randomness th = 94 # select the MI threshold maxWindows = 10 # SVC # kern = 'linear' # clf = SVC(kernel=kern, probability=True) # Logistic Regression # pen = 'l2' # clf = LogisticRegression(penalty=pen) # Random Forest numEsts = 100 clf = RandomForestClassifier(n_estimators=numEsts)
def my_process(th): # th = 95 # for a fixed MI threshold numEsts = 200 # for a fixed N_e ldir = get_mat_root() + "mlv2/threshbin/" gmitype = 'gmi5' winsize = '2' state_switch = 's2' inter_test_size = 0.66 max_windows = 8 np.random.seed(42) # fix randomness clf = RandomForestClassifier(n_estimators=numEsts, n_jobs=-1) # print(clf.__class__.__name__ + " Threshold %0.0d/100" % th) # get seizure epochs from each patient with predetermined training/testing division X_train, y_train, X_test, y_test = gmi_dataset_extract( ldir, gmitype, winsize, th, state_switch, inter_test_size, max_windows) cv = StratifiedKFold(y_train, n_folds=5) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] for i, (train, test) in enumerate(cv): probas_ = clf.fit(X_train[train], y_train[train]).predict_proba(X_train[test]) fpr, tpr, thresholds = roc_curve(y_train[test], probas_[:, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) # plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) # plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') mean_tpr /= len(cv) mean_tpr[-1] = 1.0 score = auc(mean_fpr, mean_tpr) # plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % score, lw=2) # PLOTTING # plt.xlim([-0.05, 1.05]) # plt.ylim([-0.05, 1.05]) # plt.xlabel('False Positive Rate') # plt.ylabel('True Positive Rate') # plt.title('Receiver operating characteristic example') # plt.legend(loc="lower right") # plt.show() # score = cross_val_score(clf, X_train, y_train.ravel(), cv=5) # print("Accuracy: %0.4f (+/- %0.4f)" % (100*score.mean(), 100*score.std() * 2)) # print(score) # scores.append(score) # errs.append(score.std()) savedir = "mlv2/rocbin/num_est/th{}/".format(th) savetype = "CV" comment = dict() comment['ClassifierType'] = clf.__class__.__name__ comment['MaxWindows'] = float(max_windows % 5) if "RandomForest" in comment['ClassifierType']: comment['number_estimators'] = numEsts elif "AdaBoost" in comment['ClassifierType']: comment['BaseClassifier'] = base_clf.__class__.__name__ comment['number_estimators'] = numEsts comment['algorithm'] = algo elif "LogisticRegression" in comment['ClassifierType']: comment['penalty'] = pen elif "SVC" in comment['ClassifierType']: comment['kernel'] = kern comment['StateSwitch'] = state_switch appendString = ''.join([state_switch, str(numEsts)]) rec_test_result(savetype, savedir, { 'comment': comment, 'score': score, 'th': th }, appendString)