def modelopt(nhmmstates, nhmmiter): nhmmstates = nhmmstates[0] nhmmiter = nhmmiter[0] * 10 nfolds = 5 hmmcovtype = 'full' print nhmmstates, nhmmiter # Load the dataset #static_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/train_data.npy') dynamic_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_data.npy') #static_test = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/test_data.npy') #dynamic_test = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_data.npy') labels_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy') #labels_test = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_labels.npy') nsamples = dynamic_train.shape[0] # k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) print "Current fold is %d / %d" % (fid + 1, nfolds) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_train[train_idx], labels_train[train_idx]) scores.append(hmmcl.test(model_pos, model_neg, dynamic_train[val_idx], labels_train[val_idx])) print 'Result: %.4f' % np.mean(scores) return -np.mean(scores)
def function(nhmmstates, nestimators, nhmmiter): nhmmstates = nhmmstates[0] nestimators = nestimators[0] * 100 nhmmiter = nhmmiter[0] * 10 nfolds = 5 hmmcovtype = "full" print nhmmstates, nestimators, nhmmiter # Load the dataset static_train = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/train_data.npy") dynamic_train = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_data.npy") static_val = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/test_data.npy") dynamic_val = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_data.npy") labels_train = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy") labels_val = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_labels.npy") # Merge train and test static_all = np.concatenate((static_train, static_val), axis=0) dynamic_all = np.concatenate((dynamic_train, dynamic_val), axis=0) labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = static_all.shape[0] # prepare where to store the ratios ratios_all_hmm = np.empty(len(labels_all)) # split indices into folds enrich_idx_list = np.array_split(range(nsamples), nfolds) # run CV for fid, enrich_idx in enumerate(enrich_idx_list): train_idx = list(set(range(nsamples)) - set(enrich_idx)) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train( nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx] ) ratios_all_hmm[enrich_idx] = hmmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[enrich_idx]) # dataset for hybrid learning enriched_by_hmm = np.concatenate((static_all, np.matrix(ratios_all_hmm).T), axis=1) # (2.) k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) # Hybrid on features enriched by HMM (3) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(enriched_by_hmm[train_idx], labels_all[train_idx]) scores.append(rf.score(enriched_by_hmm[val_idx], labels_all[val_idx])) print "Result: %.4f" % np.mean(scores) return -np.mean(scores)
def bpic(nhmmstates, nhmmiter): nhmmstates = nhmmstates[0] nhmmiter = nhmmiter[0] * 10 nfolds = 5 hmmcovtype = 'full' print nhmmstates, nhmmiter # Load the dataset static_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_static.npy' ) dynamic_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_dynamic.npy' ) static_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_static.npy' ) dynamic_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_dynamic.npy' ) labels_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_labels.npy' ) labels_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_labels.npy' ) # Merge train and test static_all = np.concatenate((static_train, static_val), axis=0) dynamic_all = np.concatenate((dynamic_train, dynamic_val), axis=0) labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = static_all.shape[0] # k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) scores.append( hmmcl.test(model_pos, model_neg, dynamic_all[val_idx], labels_all[val_idx])) print 'Result: %.4f' % np.mean(scores) return -np.mean(scores)
def modelopt(nhmmstates, nhmmiter): nhmmstates = nhmmstates[0] nhmmiter = nhmmiter[0] * 10 nfolds = 5 hmmcovtype = 'full' print nhmmstates, nhmmiter # Load the dataset #static_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/train_data.npy') dynamic_train = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_data.npy') #static_test = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/test_data.npy') #dynamic_test = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_data.npy') labels_train = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy') #labels_test = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_labels.npy') nsamples = dynamic_train.shape[0] # k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) print "Current fold is %d / %d" % (fid + 1, nfolds) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_train[train_idx], labels_train[train_idx]) scores.append( hmmcl.test(model_pos, model_neg, dynamic_train[val_idx], labels_train[val_idx])) print 'Result: %.4f' % np.mean(scores) return -np.mean(scores)
def bpic(nhmmstates, nhmmiter): nhmmstates = nhmmstates[0] nhmmiter = nhmmiter[0] * 10 nfolds = 5 hmmcovtype = 'full' print nhmmstates, nhmmiter # Load the dataset static_train = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_static.npy') dynamic_train = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_dynamic.npy') static_val = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_static.npy') dynamic_val = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_dynamic.npy') labels_train = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_labels.npy') labels_val = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_labels.npy') # Merge train and test static_all = np.concatenate((static_train, static_val), axis=0) dynamic_all = np.concatenate((dynamic_train, dynamic_val), axis=0) labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = static_all.shape[0] # k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) scores.append(hmmcl.test(model_pos, model_neg, dynamic_all[val_idx], labels_all[val_idx])) print 'Result: %.4f' % np.mean(scores) return -np.mean(scores)
""" Generate HMM-based classifier on syn_lstm_wins synthetic dataset """ import numpy as np from HMM.hmm_classifier import HMMClassifier print 'Loading the dataset..' train_data = np.load("/storage/hpc_anna/GMiC/Data/syn_lstm_wins/train_dynamic.npy") train_labels = np.load("/storage/hpc_anna/GMiC/Data/syn_lstm_wins/train_labels.npy") test_data = np.load("/storage/hpc_anna/GMiC/Data/syn_lstm_wins/test_dynamic.npy") test_labels = np.load("/storage/hpc_anna/GMiC/Data/syn_lstm_wins/test_labels.npy") print "Training HMM classifier..." hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(2, 2, 'full', train_data, train_labels) print hmmcl.test(model_pos, model_neg, test_data, test_labels)
# load the data print "Reading data..." train_data = np.load("/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/train_data.npy") train_labels = np.load("/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/train_labels.npy") test_data = np.load("/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/test_data.npy") test_labels = np.load("/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/test_labels.npy") # split the training data into two halves # fh stands for first half # sh stands for second half print "Splitting data in two halves..." fh_data, fh_labels, sh_data, sh_labels = DataHandler.split(0.5, train_data, train_labels) # train HMM on first 50% of the training set print "Training HMM classifier..." hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(NSTATES, NITERS, fh_data, fh_labels) # feed second 50% of the training set into the HMM to obtain # pos/neg ratio for every sequence in the second half of the training set print "Extracting ratios based on the HMM model..." sh_ratios = hmmcl.pos_neg_ratios(model_pos, model_neg, sh_data) test_ratios = hmmcl.pos_neg_ratios(model_pos, model_neg, test_data) # apply fourier transform on the second 50% of the training set print "Fouriering the second half of the dataset..." fourier_sh_data = Fourier.data_to_fourier(sh_data) fourier_test_data = Fourier.data_to_fourier(test_data) # augment fourier results of the second 50% train with the ratios thus producing an enriched dataset print "Merging Fourier features and HMM-based ratios..."
# -*- coding: utf-8 -*- """ Created on Wed Jul 22 17:58:39 2015 @author: annaleontjeva """ from HMM.hmm_classifier import HMMClassifier from DataNexus.datahandler import DataHandler import numpy as np train_data = np.load( "/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/train_data.npy") train_labels = np.load( "/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/train_labels.npy") test_data = np.load( "/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/test_data.npy") test_labels = np.load( "/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/test_labels.npy") hmmcl = HMMClassifier() #model_pos, model_neg = hmmcl.train(20, 100, train_data, train_labels) #print hmmcl.test_model(model_pos, model_neg, test_data, test_labels) hmmcl.find_best_parameter(0.7, range(20, 31), 10, 5, train_data, train_labels)
static_val = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/test_data.npy') dynamic_val = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_data.npy') labels_train = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy') labels_val = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_labels.npy') nsamples = dynamic_train.shape[0] # split indices into folds val_idx_list = np.array_split(range(nsamples), nfolds) # run CV scores = [] for fid, val_idx in enumerate(val_idx_list): print "Current fold is %d/%d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(val_idx)) # train the model and report performance hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nstates, niter, 'full', dynamic_train[train_idx], labels_train[train_idx]) scores.append( hmmcl.test(model_pos, model_neg, dynamic_train[val_idx], labels_train[val_idx])) print "===> (7) HMM with dynamic features on CV: %.4f (+/- %.4f) %s" % ( np.mean(scores), np.std(scores), scores)
labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = static_all.shape[0] # # Cross-validation to collect enrichment features # visits_all_hmm = np.empty((len(labels_all), nstates * 2)) predict_idx_list = np.array_split(range(nsamples), nfolds) for fid, predict_idx in enumerate(predict_idx_list): print "Current fold is %d" % fid train_idx = list(set(range(nsamples)) - set(predict_idx)) # extract visit counts from HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nstates, niter, covtype, dynamic_all[train_idx], labels_all[train_idx]) visits_pos, visits_neg = hmmcl.state_visits(model_pos, model_neg, dynamic_all[predict_idx]) visits_all_hmm[predict_idx] = np.hstack((visits_pos, visits_neg)) # prepare the dataset enriched_by_hmm = np.concatenate((static_all, visits_all_hmm), axis=1) # split the data into training and test train_idx = np.random.choice(range(0, nsamples), size=np.round(nsamples * 0.7, 0), replace=False) test_idx = list(set(range(0, nsamples)) - set(train_idx)) # train hybrid on features enriched by HMM (3) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(enriched_by_hmm[train_idx], labels_all[train_idx]) print "===> (3) Hybrid (RF) on features (state visit counts) enriched by HMM: %.4f" % rf.score(enriched_by_hmm[test_idx], labels_all[test_idx])
dynamic_all = np.concatenate((dynamic_train, dynamic_val), axis=0) labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = static_all.shape[0] # split indices into folds val_idx_list = np.array_split(range(nsamples), nfolds) # run CV scores_acc = [] scores_auc = [] for fid, val_idx in enumerate(val_idx_list): print "Current fold is %d" % fid train_idx = list(set(range(nsamples)) - set(val_idx)) # HMM on dynamic features (7) hmmcl = HMMClassifier() dnm_train = dynamic_all[train_idx] lbls_train = labels_all[train_idx] dnm_val = dynamic_all[train_idx] lbls_val = labels_all[train_idx] model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dnm_train, lbls_train) acc, auc = hmmcl.test(model_pos, model_neg, dnm_val, lbls_val) scores_acc.append(acc) scores_auc.append(auc) print "===> (7) accuracy of HMM on dynamic features: %.4f (+/- %.4f) %s" % (np.mean(scores_acc), np.std(scores_acc), scores_acc) print "===> (7) auc of HMM on dynamic features: %.4f (+/- %.4f) %s" % (np.mean(scores_auc), np.std(scores_auc), scores_auc)
# run CV for fid, predict_idx in enumerate(predict_idx_list): print "Current fold is %d / %d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(predict_idx)) # extract predictions using RF on static print "Extracting predictions on static data with RF..." rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(static_all[train_idx], labels_all[train_idx]) predictions_all_rf[predict_idx] = rf.predict_log_proba(static_all[predict_idx]) predictions_all_rf[predictions_all_rf == -inf] = np.min(predictions_all_rf[predictions_all_rf != -inf]) # extract predictions using HMM on dynamic print "Extracting predictions on dynamic data with HMM..." hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) predictions_all_hmm[predict_idx] = hmmcl.predict_log_proba(model_pos, model_neg, dynamic_all[predict_idx]) ratios_all_hmm[predict_idx] = hmmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[predict_idx]) # # Prepare combined datasets for the future experiments # # datasets for ensemble learning predictions_combined_rf_hmm = np.concatenate((predictions_all_rf, predictions_all_hmm), axis=1) # datasets for hybrid learning enriched_by_hmm = np.concatenate((static_all, np.matrix(ratios_all_hmm).T), axis=1) # dataset to confirm that RF on dynamic is not better than generative models on dynamic data
# # Train enrichment models on trainA # print 'Training enrichment models...' # extract predictions using RF on static rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(trainA_static, trainA_labels) predictions_trainB_rf = rf.predict_log_proba(trainB_static) predictions_trainB_rf[predictions_trainB_rf == -inf] = np.min(predictions_trainB_rf[predictions_trainB_rf != -inf]) predictions_test_rf = rf.predict_log_proba(test_static) predictions_test_rf[predictions_test_rf == -inf] = np.min(predictions_test_rf[predictions_test_rf != -inf]) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, trainA_dynamic, trainA_labels) predictions_trainB_hmm = hmmcl.predict_log_proba(model_pos, model_neg, trainB_dynamic) ratios_trainB_hmm = hmmcl.pos_neg_ratios(model_pos, model_neg, trainB_dynamic) predictions_test_hmm = hmmcl.predict_log_proba(model_pos, model_neg, test_dynamic) ratios_test_hmm = hmmcl.pos_neg_ratios(model_pos, model_neg, test_dynamic) # extract predictions using LSTM on dynamic lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(trainA_dynamic, trainA_labels) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, trainB_dynamic) predictions_trainB_lstm = np.vstack((mse_pos, mse_neg)).T ratios_trainB_lstm = lstmcl.pos_neg_ratios(model_pos, model_neg, trainB_dynamic) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, test_dynamic) predictions_test_lstm = np.vstack((mse_pos, mse_neg)).T ratios_test_lstm = lstmcl.pos_neg_ratios(model_pos, model_neg, test_dynamic)
# prepare where to store the ratios ratios_all_hmm = np.empty(len(labels_all)) predictions_all_hmm = np.empty((len(labels_all), 2)) predictions_all = np.empty((len(labels_all),)) # split indices into folds enrich_idx_list = np.array_split(range(nsamples), nfolds) # run CV for enrichment for fid, enrich_idx in enumerate(enrich_idx_list): print "Current fold is %d/%d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(enrich_idx)) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) ratios_all_hmm[enrich_idx] = hmmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[enrich_idx]) predictions_all_hmm[enrich_idx] = hmmcl.predict_log_proba(model_pos, model_neg, dynamic_all[enrich_idx]) predictions_all[enrich_idx] = hmmcl.predict(hmmcl.tensor_to_list(dynamic_all[enrich_idx]), model_pos, model_neg) # dataset for hybrid learning dynamic_as_static = dynamic_all.reshape((dynamic_all.shape[0], dynamic_all.shape[1] * dynamic_all.shape[2])) enriched_by_hmm = np.concatenate((dynamic_as_static, predictions_all_hmm), axis=1) # k-fold cross validation to obtain accuracy print "===> HMM on dynamic: %.4f" % hmmcl.accuracy(predictions_all, labels_all) val_idx_list = np.array_split(range(nsamples), nfolds) scores = []
labels_train = np.load("/storage/hpc_anna/GMiC/Data/syn_lstm_wins/train_labels.npy") labels_val = np.load("/storage/hpc_anna/GMiC/Data/syn_lstm_wins/test_labels.npy") # # Sanity Checks # print "Expected performance of a lonely model is 0.75, of the joint model 1.0" # static data with RF rf = RandomForestClassifier(n_estimators=100) rf.fit(static_train, labels_train) print "Random Forest with static features on validation set: %.4f" % rf.score(static_val, labels_val) # dynamic data with HMM hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(3, 10, dynamic_train, labels_train) print "HMM with dynamic features on validation set: %.4f" % hmmcl.test(model_pos, model_neg, dynamic_val, labels_val) # dynamic data with LSTM lstmcl = LSTMClassifier(2000, 0.5, "adagrad", 20) model_pos, model_neg = lstmcl.train(dynamic_train, labels_train) print "LSTM with dynamic features on validation set: %.4f" % lstmcl.test(model_pos, model_neg, dynamic_val, labels_val) # dynamic data with RF print "Training RF on the dynamic dataset..." dynamic_as_static_train = dynamic_train.reshape( (dynamic_train.shape[0], dynamic_train.shape[1] * dynamic_train.shape[2]) ) dynamic_as_static_val = dynamic_val.reshape((dynamic_val.shape[0], dynamic_val.shape[1] * dynamic_val.shape[2])) rf = RandomForestClassifier(n_estimators=100)
labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = static_all.shape[0] # # Cross-validation to collect enrichment features # visits_all_hmm = np.empty((len(labels_all), nstates * 2)) predict_idx_list = np.array_split(range(nsamples), nfolds) for fid, predict_idx in enumerate(predict_idx_list): print "Current fold is %d" % fid train_idx = list(set(range(nsamples)) - set(predict_idx)) # extract visit counts from HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nstates, niter, covtype, dynamic_all[train_idx], labels_all[train_idx]) visits_pos, visits_neg = hmmcl.state_visits(model_pos, model_neg, dynamic_all[predict_idx]) visits_all_hmm[predict_idx] = np.hstack((visits_pos, visits_neg)) # prepare the dataset enriched_by_hmm = np.concatenate((static_all, visits_all_hmm), axis=1) # split the data into training and test train_idx = np.random.choice(range(0, nsamples), size=np.round(nsamples * 0.7, 0), replace=False) test_idx = list(set(range(0, nsamples)) - set(train_idx)) # train hybrid on features enriched by HMM (3) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(enriched_by_hmm[train_idx], labels_all[train_idx]) print "===> (3) Hybrid (RF) on features (state visit counts) enriched by HMM: %.4f" % rf.score(
labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = static_all.shape[0] # prepare where to store the ratios ratios_all_hmm = np.empty(len(labels_all)) # split indices into folds enrich_idx_list = np.array_split(range(nsamples), nfolds) # CV for feature enrichment for fid, enrich_idx in enumerate(enrich_idx_list): print "Current fold is %d / %d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(enrich_idx)) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) ratios_all_hmm[enrich_idx] = hmmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[enrich_idx]) # dataset for hybrid learning enriched_by_hmm = np.concatenate((static_all, np.matrix(ratios_all_hmm).T), axis=1) # CV for accuracy estimation val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): print "Current fold is %d / %d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(val_idx)) # Hybrid on features enriched by HMM (3)
# Train enrichment models on trainA # print 'Training enrichment models...' # extract predictions using RF on static rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(trainA_static, trainA_labels) predictions_trainB_rf = rf.predict_log_proba(trainB_static) predictions_trainB_rf[predictions_trainB_rf == -inf] = np.min( predictions_trainB_rf[predictions_trainB_rf != -inf]) predictions_test_rf = rf.predict_log_proba(test_static) predictions_test_rf[predictions_test_rf == -inf] = np.min( predictions_test_rf[predictions_test_rf != -inf]) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, trainA_dynamic, trainA_labels) predictions_trainB_hmm = hmmcl.predict_log_proba(model_pos, model_neg, trainB_dynamic) ratios_trainB_hmm = hmmcl.pos_neg_ratios(model_pos, model_neg, trainB_dynamic) predictions_test_hmm = hmmcl.predict_log_proba(model_pos, model_neg, test_dynamic) ratios_test_hmm = hmmcl.pos_neg_ratios(model_pos, model_neg, test_dynamic) # extract predictions using LSTM on dynamic lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(trainA_dynamic, trainA_labels) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, trainB_dynamic) predictions_trainB_lstm = np.vstack((mse_pos, mse_neg)).T
"/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/train_labels.npy") test_data = np.load( "/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/test_data.npy") test_labels = np.load( "/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/test_labels.npy") # split the training data into two halves # fh stands for first half # sh stands for second half print "Splitting data in two halves..." fh_data, fh_labels, sh_data, sh_labels = DataHandler.split( 0.5, train_data, train_labels) # train HMM on first 50% of the training set print "Training HMM classifier..." hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(NSTATES, NITERS, fh_data, fh_labels) # feed second 50% of the training set into the HMM to obtain # pos/neg ratio for every sequence in the second half of the training set print "Extracting ratios based on the HMM model..." sh_ratios = hmmcl.pos_neg_ratios(model_pos, model_neg, sh_data) test_ratios = hmmcl.pos_neg_ratios(model_pos, model_neg, test_data) # apply fourier transform on the second 50% of the training set print "Fouriering the second half of the dataset..." fourier_sh_data = Fourier.data_to_fourier(sh_data) fourier_test_data = Fourier.data_to_fourier(test_data) # augment fourier results of the second 50% train with the ratios thus producing an enriched dataset print "Merging Fourier features and HMM-based ratios..."
""" Generate HMM-based classifier on syn_lstm_wins synthetic dataset """ import numpy as np from HMM.hmm_classifier import HMMClassifier print 'Loading the dataset..' train_data = np.load( "/storage/hpc_anna/GMiC/Data/syn_lstm_wins/train_dynamic.npy") train_labels = np.load( "/storage/hpc_anna/GMiC/Data/syn_lstm_wins/train_labels.npy") test_data = np.load( "/storage/hpc_anna/GMiC/Data/syn_lstm_wins/test_dynamic.npy") test_labels = np.load( "/storage/hpc_anna/GMiC/Data/syn_lstm_wins/test_labels.npy") print "Training HMM classifier..." hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(2, 2, 'full', train_data, train_labels) print hmmcl.test(model_pos, model_neg, test_data, test_labels)
# run CV for fid, predict_idx in enumerate(predict_idx_list): print "Current fold is %d" % fid train_idx = list(set(range(nsamples)) - set(predict_idx)) # extract predictions using RF on static print " Extracting predictions on static data with RF..." rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(static_all[train_idx], labels_all[train_idx]) predictions_all_rf[predict_idx] = rf.predict_log_proba(static_all[predict_idx]) predictions_all_rf[predictions_all_rf == -inf] = np.min(predictions_all_rf[predictions_all_rf != -inf]) # extract predictions using HMM on dynamic print " Extracting predictions on dynamic data with HMM..." hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) predictions_all_hmm[predict_idx] = hmmcl.predict_log_proba(model_pos, model_neg, dynamic_all[predict_idx]) ratios_all_hmm[predict_idx] = hmmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[predict_idx]) # extract predictions using LSTM on dynamic print " Extracting predictions on dynamic data with LSTM..." lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, dynamic_all[predict_idx]) predictions_all_lstm[predict_idx] = np.vstack((mse_pos, mse_neg)).T ratios_all_lstm[predict_idx] = lstmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[predict_idx]) #
# prepare where to store the ratios ratios_all_hmm = np.empty(len(labels_all)) predictions_all_hmm = np.empty((len(labels_all), 2)) predictions_all = np.empty((len(labels_all), )) # split indices into folds enrich_idx_list = np.array_split(range(nsamples), nfolds) # run CV for enrichment for fid, enrich_idx in enumerate(enrich_idx_list): print "Current fold is %d/%d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(enrich_idx)) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) ratios_all_hmm[enrich_idx] = hmmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[enrich_idx]) predictions_all_hmm[enrich_idx] = hmmcl.predict_log_proba(model_pos, model_neg, dynamic_all[enrich_idx]) predictions_all[enrich_idx] = hmmcl.predict(hmmcl.tensor_to_list(dynamic_all[enrich_idx]), model_pos, model_neg) # dataset for hybrid learning dynamic_as_static = dynamic_all.reshape((dynamic_all.shape[0], dynamic_all.shape[1] * dynamic_all.shape[2])) enriched_by_hmm = np.concatenate((dynamic_as_static, predictions_all_hmm), axis=1) # k-fold cross validation to obtain accuracy print '===> HMM on dynamic: %.4f' % hmmcl.accuracy(predictions_all, labels_all) val_idx_list = np.array_split(range(nsamples), nfolds)
# -*- coding: utf-8 -*- """ Created on Wed Jul 22 17:58:39 2015 @author: annaleontjeva """ from HMM.hmm_classifier import HMMClassifier from DataNexus.datahandler import DataHandler import numpy as np train_data = np.load("/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/train_data.npy") train_labels = np.load("/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/train_labels.npy") test_data = np.load("/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/test_data.npy") test_labels = np.load("/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/test_labels.npy") hmmcl = HMMClassifier() #model_pos, model_neg = hmmcl.train(20, 100, train_data, train_labels) #print hmmcl.test_model(model_pos, model_neg, test_data, test_labels) hmmcl.find_best_parameter(0.7, range(20,31), 10, 5, train_data, train_labels)
# parameters nfolds = 5 nstates = 6 niter = 50 # load data static_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/train_data.npy') dynamic_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_data.npy') static_val = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/test_data.npy') dynamic_val = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_data.npy') labels_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy') labels_val = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_labels.npy') nsamples = dynamic_train.shape[0] # split indices into folds val_idx_list = np.array_split(range(nsamples), nfolds) # run CV scores = [] for fid, val_idx in enumerate(val_idx_list): print "Current fold is %d/%d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(val_idx)) # train the model and report performance hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nstates, niter, 'full', dynamic_train[train_idx], labels_train[train_idx]) scores.append(hmmcl.test(model_pos, model_neg, dynamic_train[val_idx], labels_train[val_idx])) print "===> (7) HMM with dynamic features on CV: %.4f (+/- %.4f) %s" % (np.mean(scores), np.std(scores), scores)
def bpic(nhmmstates, nestimators, nhmmiter): nhmmstates = nhmmstates[0] nestimators = nestimators[0] * 100 nhmmiter = nhmmiter[0] * 10 nfolds = 5 hmmcovtype = 'full' print nhmmstates, nestimators, nhmmiter # Load the dataset static_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_static.npy' ) dynamic_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_dynamic.npy' ) static_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_static.npy' ) dynamic_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_dynamic.npy' ) labels_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_labels.npy' ) labels_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_labels.npy' ) # Merge train and test static_all = np.concatenate((static_train, static_val), axis=0) dynamic_all = np.concatenate((dynamic_train, dynamic_val), axis=0) labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = static_all.shape[0] # prepare where to store the ratios ratios_all_hmm = np.empty(len(labels_all)) # split indices into folds enrich_idx_list = np.array_split(range(nsamples), nfolds) # run CV for fid, enrich_idx in enumerate(enrich_idx_list): train_idx = list(set(range(nsamples)) - set(enrich_idx)) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) ratios_all_hmm[enrich_idx] = hmmcl.pos_neg_ratios( model_pos, model_neg, dynamic_all[enrich_idx]) # dataset for hybrid learning enriched_by_hmm = np.concatenate((static_all, np.matrix(ratios_all_hmm).T), axis=1) # (2.) k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) # Hybrid on features enriched by HMM (3) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(enriched_by_hmm[train_idx], labels_all[train_idx]) scores.append(rf.score(enriched_by_hmm[val_idx], labels_all[val_idx])) print 'Result: %.4f' % np.mean(scores) return -np.mean(scores)
) labels_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_labels.npy') # sclare overly large values #dynamic_train[:, 1, :] = dynamic_train[:, 1, :] / 1000000000.0 #dynamic_val[:, 1, :] = dynamic_val[:, 1, :] / 1000000000.0 # static data with RF rf = RandomForestClassifier(n_estimators=rf_estimators, n_jobs=-1) rf.fit(static_train, labels_train) print "Random Forest with static features on validation set: %.4f" % rf.score( static_val, labels_val) # dynamic data with HMM hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(3, 10, dynamic_train, labels_train) print "HMM with dynamic features on validation set: %.4f" % hmmcl.test( model_pos, model_neg, dynamic_val, labels_val) # dynamic data with RF print "Training RF on the dynamic dataset..." dynamic_as_static_train = dynamic_train.reshape( (dynamic_train.shape[0], dynamic_train.shape[1] * dynamic_train.shape[2])) dynamic_as_static_val = dynamic_val.reshape( (dynamic_val.shape[0], dynamic_val.shape[1] * dynamic_val.shape[2])) rf = RandomForestClassifier(n_estimators=rf_estimators, n_jobs=-1) rf.fit(dynamic_as_static_train, labels_train) print "RF with dynamic features on validation set: %.4f" % rf.score( dynamic_as_static_val, labels_val)
print 'Loading the dataset..' static_train = np.load('/storage/hpc_anna/GMiC/Data/syn_multisame/train_static.npy') dynamic_train = np.load('/storage/hpc_anna/GMiC/Data/syn_multisame/train_dynamic.npy') static_val = np.load('/storage/hpc_anna/GMiC/Data/syn_multisame/test_static.npy') dynamic_val = np.load('/storage/hpc_anna/GMiC/Data/syn_multisame/test_dynamic.npy') labels_train = np.load('/storage/hpc_anna/GMiC/Data/syn_multisame/train_labels.npy') labels_val = np.load('/storage/hpc_anna/GMiC/Data/syn_multisame/test_labels.npy') # # Evaluating Joint Model # print "Evaluating joint model:" print "Splitting data in two halves..." fh_idx = np.random.choice(range(0, dynamic_train.shape[0]), size=np.round(dynamic_train.shape[0] * 0.5, 0), replace=False) sh_idx = list(set(range(0, dynamic_train.shape[0])) - set(fh_idx)) fh_data = dynamic_train[fh_idx, :, :] fh_labels = labels_train[fh_idx] sh_data = dynamic_train[sh_idx, :, :] sh_labels = labels_train[sh_idx] print "Training HMM classifier..." hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(3, 10, fh_data, fh_labels) print model_pos.startprob, model_pos.transmat, model_pos.means, model_pos.covars print model_neg.startprob, model_neg.transmat, model_neg.means, model_neg.covars
# -*- coding: utf-8 -*- """ Created on Wed Jul 22 17:58:39 2015 @author: annaleontjeva """ from HMM.hmm_classifier import HMMClassifier import numpy as np # load the dataset train_data = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_data.npy") train_labels = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy") test_data = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_data.npy") test_labels = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_labels.npy") # TEMPORARY FOR TESTING PURPOSES #train_data = train_data[:, 0:2, :] #test_data = test_data[:, 0:2, :] # initialize HMM based classifier hmmcl = HMMClassifier() # train a model pair for each feature models_pos, models_neg = hmmcl.train_per_feature(3, 10, train_data, train_labels) # show accuracy on the test set print hmmcl.test_per_feature(models_pos, models_neg, test_data, test_labels)
# k-fold CV for enrichment # prepare where to store the ratios ratios_all_hmm = np.empty(len(labels_all)) ratios_all_lstm = np.empty(len(labels_all)) # split indices into folds predict_idx_list = np.array_split(range(nsamples), nfolds) # run CV for fid, predict_idx in enumerate(predict_idx_list): print "Enrichment fold %d / %d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(predict_idx)) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) ratios_all_hmm[predict_idx] = hmmcl.pos_neg_ratios( model_pos, model_neg, dynamic_all[predict_idx]) # extract predictions using LSTM on dynamic lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, dynamic_all[predict_idx]) ratios_all_lstm[predict_idx] = lstmcl.pos_neg_ratios( model_pos, model_neg, dynamic_all[predict_idx])
#print dynamic_val_labels.shape # # Sanity Checks # print "Error ratio: %.2f, expected performance of a lonely model is %.2f, of the joint model %.2f" % (error_ratio, 1 - error_ratio, 1 - error_ratio + error_ratio / 2.0) # a) static data classification rf = RandomForestClassifier(n_estimators=100) rf.fit(static_train_data, static_train_labels) print "Random Forest with static features on validation set: %.4f" % rf.score(static_val_data, static_val_labels) # b) dynamic data classification hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(3, 10, dynamic_train_data, dynamic_train_labels) print "HMM with dynamic features on validation set: %.4f" % hmmcl.test(model_pos, model_neg, dynamic_val_data, dynamic_val_labels) # # Evaluating Joint Model # print "" print "Evaluating joint model:" print "Splitting data in two halves..." fh_idx = np.random.choice(range(0, dynamic_train_data.shape[0]), size=np.round(dynamic_train_data.shape[0] * 0.5, 0), replace=False) sh_idx = list(set(range(0, dynamic_train_data.shape[0])) - set(fh_idx)) fh_data = dynamic_train_data[fh_idx, :, :] fh_labels = dynamic_train_labels[fh_idx]
# run CV for fid, predict_idx in enumerate(predict_idx_list): print "Current fold is %d" % fid train_idx = list(set(range(nsamples)) - set(predict_idx)) # extract predictions using RF on static print " Extracting predictions on static data with RF..." rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(static_all[train_idx], labels_all[train_idx]) predictions_all_rf[predict_idx] = rf.predict_log_proba(static_all[predict_idx]) predictions_all_rf[predictions_all_rf == -inf] = np.min(predictions_all_rf[predictions_all_rf != -inf]) # extract predictions using HMM on dynamic print " Extracting predictions on dynamic data with HMM..." hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) predictions_all_hmm[predict_idx] = hmmcl.predict_log_proba(model_pos, model_neg, dynamic_all[predict_idx]) ratios_all_hmm[predict_idx] = hmmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[predict_idx]) # extract predictions using LSTM on dynamic print " Extracting predictions on dynamic data with LSTM..." lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, dynamic_all[predict_idx]) predictions_all_lstm[predict_idx] = np.vstack((mse_pos, mse_neg)).T ratios_all_lstm[predict_idx] = lstmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[predict_idx]) #
labels_val = np.load( '/storage/hpc_anna/GMiC/Data/syn_lstm_wins/test_labels.npy') # # Sanity Checks # print "Expected performance of a lonely model is 0.75, of the joint model 1.0" # static data with RF rf = RandomForestClassifier(n_estimators=100) rf.fit(static_train, labels_train) print "Random Forest with static features on validation set: %.4f" % rf.score( static_val, labels_val) # dynamic data with HMM hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(3, 10, dynamic_train, labels_train) print "HMM with dynamic features on validation set: %.4f" % hmmcl.test( model_pos, model_neg, dynamic_val, labels_val) # dynamic data with LSTM lstmcl = LSTMClassifier(2000, 0.5, 'adagrad', 20) model_pos, model_neg = lstmcl.train(dynamic_train, labels_train) print "LSTM with dynamic features on validation set: %.4f" % lstmcl.test( model_pos, model_neg, dynamic_val, labels_val) # dynamic data with RF print "Training RF on the dynamic dataset..." dynamic_as_static_train = dynamic_train.reshape( (dynamic_train.shape[0], dynamic_train.shape[1] * dynamic_train.shape[2])) dynamic_as_static_val = dynamic_val.reshape(
# # Prepare combined datasets for the future experiments # # dataset to check how generative models perform if provided with static features along with dynamic static_as_dynamic = np.zeros((static_all.shape[0], static_all.shape[1], dynamic_all.shape[2])) for i in range(static_all.shape[0]): static_as_dynamic[i, :, :] = np.tile(static_all[i, :], (dynamic_all.shape[2], 1)).T dynamic_and_static_as_dynamic = np.concatenate((dynamic_all, static_as_dynamic + np.random.uniform(-0.0001, 0.0001, static_as_dynamic.shape)), axis=1) # # k-fold CV for performance estimation # val_idx_list = np.array_split(range(nsamples), nfolds) scores = {1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 11: []} for fid, val_idx in enumerate(val_idx_list): print "Current fold is %d / %d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(val_idx)) # HMM on dynamic and static (turned into fake sequences) (9) hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_and_static_as_dynamic[train_idx], labels_all[train_idx]) acc, auc = hmmcl.test(model_pos, model_neg, dynamic_and_static_as_dynamic[val_idx], labels_all[val_idx]) scores[9].append(acc) print "===> (9) HMM on dynamic and static features: %.4f (+/- %.4f) %s" % (np.mean(scores[9]), np.std(scores[9]), scores[9])
for fid, predict_idx in enumerate(predict_idx_list): print "Current fold is %d" % fid train_idx = list(set(range(nsamples)) - set(predict_idx)) # extract predictions using RF on static print " Extracting predictions on static data with RF..." rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(static_all[train_idx], labels_all[train_idx]) predictions_all_rf[predict_idx] = rf.predict_log_proba( static_all[predict_idx]) predictions_all_rf[predictions_all_rf == -inf] = np.min( predictions_all_rf[predictions_all_rf != -inf]) # extract predictions using HMM on dynamic print " Extracting predictions on dynamic data with HMM..." hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) predictions_all_hmm[predict_idx] = hmmcl.predict_log_proba( model_pos, model_neg, dynamic_all[predict_idx]) ratios_all_hmm[predict_idx] = hmmcl.pos_neg_ratios( model_pos, model_neg, dynamic_all[predict_idx]) # extract predictions using LSTM on dynamic print " Extracting predictions on dynamic data with LSTM..." lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg,