def modelopt(nhmmstates, nhmmiter): nhmmstates = nhmmstates[0] nhmmiter = nhmmiter[0] * 10 nfolds = 5 hmmcovtype = 'full' print nhmmstates, nhmmiter # Load the dataset #static_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/train_data.npy') dynamic_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_data.npy') #static_test = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/test_data.npy') #dynamic_test = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_data.npy') labels_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy') #labels_test = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_labels.npy') nsamples = dynamic_train.shape[0] # k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) print "Current fold is %d / %d" % (fid + 1, nfolds) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_train[train_idx], labels_train[train_idx]) scores.append(hmmcl.test(model_pos, model_neg, dynamic_train[val_idx], labels_train[val_idx])) print 'Result: %.4f' % np.mean(scores) return -np.mean(scores)
def bpic(nhmmstates, nhmmiter): nhmmstates = nhmmstates[0] nhmmiter = nhmmiter[0] * 10 nfolds = 5 hmmcovtype = 'full' print nhmmstates, nhmmiter # Load the dataset static_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_static.npy' ) dynamic_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_dynamic.npy' ) static_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_static.npy' ) dynamic_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_dynamic.npy' ) labels_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_labels.npy' ) labels_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_labels.npy' ) # Merge train and test static_all = np.concatenate((static_train, static_val), axis=0) dynamic_all = np.concatenate((dynamic_train, dynamic_val), axis=0) labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = static_all.shape[0] # k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) scores.append( hmmcl.test(model_pos, model_neg, dynamic_all[val_idx], labels_all[val_idx])) print 'Result: %.4f' % np.mean(scores) return -np.mean(scores)
def modelopt(nhmmstates, nhmmiter): nhmmstates = nhmmstates[0] nhmmiter = nhmmiter[0] * 10 nfolds = 5 hmmcovtype = 'full' print nhmmstates, nhmmiter # Load the dataset #static_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/train_data.npy') dynamic_train = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_data.npy') #static_test = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/test_data.npy') #dynamic_test = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_data.npy') labels_train = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy') #labels_test = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_labels.npy') nsamples = dynamic_train.shape[0] # k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) print "Current fold is %d / %d" % (fid + 1, nfolds) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_train[train_idx], labels_train[train_idx]) scores.append( hmmcl.test(model_pos, model_neg, dynamic_train[val_idx], labels_train[val_idx])) print 'Result: %.4f' % np.mean(scores) return -np.mean(scores)
def bpic(nhmmstates, nhmmiter): nhmmstates = nhmmstates[0] nhmmiter = nhmmiter[0] * 10 nfolds = 5 hmmcovtype = 'full' print nhmmstates, nhmmiter # Load the dataset static_train = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_static.npy') dynamic_train = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_dynamic.npy') static_val = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_static.npy') dynamic_val = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_dynamic.npy') labels_train = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_labels.npy') labels_val = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_labels.npy') # Merge train and test static_all = np.concatenate((static_train, static_val), axis=0) dynamic_all = np.concatenate((dynamic_train, dynamic_val), axis=0) labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = static_all.shape[0] # k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) scores.append(hmmcl.test(model_pos, model_neg, dynamic_all[val_idx], labels_all[val_idx])) print 'Result: %.4f' % np.mean(scores) return -np.mean(scores)
'/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_labels.npy') # sclare overly large values #dynamic_train[:, 1, :] = dynamic_train[:, 1, :] / 1000000000.0 #dynamic_val[:, 1, :] = dynamic_val[:, 1, :] / 1000000000.0 # static data with RF rf = RandomForestClassifier(n_estimators=rf_estimators, n_jobs=-1) rf.fit(static_train, labels_train) print "Random Forest with static features on validation set: %.4f" % rf.score( static_val, labels_val) # dynamic data with HMM hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(3, 10, dynamic_train, labels_train) print "HMM with dynamic features on validation set: %.4f" % hmmcl.test( model_pos, model_neg, dynamic_val, labels_val) # dynamic data with RF print "Training RF on the dynamic dataset..." dynamic_as_static_train = dynamic_train.reshape( (dynamic_train.shape[0], dynamic_train.shape[1] * dynamic_train.shape[2])) dynamic_as_static_val = dynamic_val.reshape( (dynamic_val.shape[0], dynamic_val.shape[1] * dynamic_val.shape[2])) rf = RandomForestClassifier(n_estimators=rf_estimators, n_jobs=-1) rf.fit(dynamic_as_static_train, labels_train) print "RF with dynamic features on validation set: %.4f" % rf.score( dynamic_as_static_val, labels_val) # dynamic data with LSTM lstmcl = LSTMClassifier(2000, 0.5, 'adagrad', lstm_nepochs) model_pos, model_neg = lstmcl.train(dynamic_train, labels_train)
# parameters nfolds = 5 nstates = 6 niter = 50 # load data static_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/train_data.npy') dynamic_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_data.npy') static_val = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/test_data.npy') dynamic_val = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_data.npy') labels_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy') labels_val = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_labels.npy') nsamples = dynamic_train.shape[0] # split indices into folds val_idx_list = np.array_split(range(nsamples), nfolds) # run CV scores = [] for fid, val_idx in enumerate(val_idx_list): print "Current fold is %d/%d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(val_idx)) # train the model and report performance hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nstates, niter, 'full', dynamic_train[train_idx], labels_train[train_idx]) scores.append(hmmcl.test(model_pos, model_neg, dynamic_train[val_idx], labels_train[val_idx])) print "===> (7) HMM with dynamic features on CV: %.4f (+/- %.4f) %s" % (np.mean(scores), np.std(scores), scores)
""" Generate HMM-based classifier on syn_lstm_wins synthetic dataset """ import numpy as np from HMM.hmm_classifier import HMMClassifier print 'Loading the dataset..' train_data = np.load( "/storage/hpc_anna/GMiC/Data/syn_lstm_wins/train_dynamic.npy") train_labels = np.load( "/storage/hpc_anna/GMiC/Data/syn_lstm_wins/train_labels.npy") test_data = np.load( "/storage/hpc_anna/GMiC/Data/syn_lstm_wins/test_dynamic.npy") test_labels = np.load( "/storage/hpc_anna/GMiC/Data/syn_lstm_wins/test_labels.npy") print "Training HMM classifier..." hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(2, 2, 'full', train_data, train_labels) print hmmcl.test(model_pos, model_neg, test_data, test_labels)
dynamic_all = np.concatenate((dynamic_train, dynamic_val), axis=0) labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = static_all.shape[0] # split indices into folds val_idx_list = np.array_split(range(nsamples), nfolds) # run CV scores_acc = [] scores_auc = [] for fid, val_idx in enumerate(val_idx_list): print "Current fold is %d" % fid train_idx = list(set(range(nsamples)) - set(val_idx)) # HMM on dynamic features (7) hmmcl = HMMClassifier() dnm_train = dynamic_all[train_idx] lbls_train = labels_all[train_idx] dnm_val = dynamic_all[train_idx] lbls_val = labels_all[train_idx] model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dnm_train, lbls_train) acc, auc = hmmcl.test(model_pos, model_neg, dnm_val, lbls_val) scores_acc.append(acc) scores_auc.append(auc) print "===> (7) accuracy of HMM on dynamic features: %.4f (+/- %.4f) %s" % (np.mean(scores_acc), np.std(scores_acc), scores_acc) print "===> (7) auc of HMM on dynamic features: %.4f (+/- %.4f) %s" % (np.mean(scores_auc), np.std(scores_auc), scores_auc)
scores[3].append(rf.score(enriched_by_hmm[val_idx], labels_all[val_idx])) # RF on static features (5) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(static_all[train_idx], labels_all[train_idx]) scores[5].append(rf.score(static_all[val_idx], labels_all[val_idx])) # RF on dynamic features (6) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(dynamic_as_static[train_idx], labels_all[train_idx]) scores[6].append(rf.score(dynamic_as_static[val_idx], labels_all[val_idx])) # HMM on dynamic features (7) hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) acc, auc = hmmcl.test(model_pos, model_neg, dynamic_all[val_idx], labels_all[val_idx]) scores[7].append(acc) # HMM on dynamic and static (turned into fake sequences) (9) hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_and_static_as_dynamic[train_idx], labels_all[train_idx]) scores[9].append(hmmcl.test(model_pos, model_neg, dynamic_and_static_as_dynamic[val_idx], labels_all[val_idx])) # RF on static and dynamic (spatialized) features (11) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(static_and_dynamic_as_static[train_idx], labels_all[train_idx]) scores[11].append(rf.score(static_and_dynamic_as_static[val_idx], labels_all[val_idx])) print "===> (1) Ensemble (RF) on predictions by RF and HMM: %.4f (+/- %.4f) %s" % (np.mean(scores[1]), np.std(scores[1]), scores[1]) print "===> (3) Hybrid (RF) on features enriched by HMM: %.4f (+/- %.4f) %s" % (np.mean(scores[3]), np.std(scores[3]), scores[3]) print "===> (5) RF on static features: %.4f (+/- %.4f) %s" % (np.mean(scores[5]), np.std(scores[5]), scores[5])
""" Generate HMM-based classifier on syn_lstm_wins synthetic dataset """ import numpy as np from HMM.hmm_classifier import HMMClassifier print 'Loading the dataset..' train_data = np.load("/storage/hpc_anna/GMiC/Data/syn_lstm_wins/train_dynamic.npy") train_labels = np.load("/storage/hpc_anna/GMiC/Data/syn_lstm_wins/train_labels.npy") test_data = np.load("/storage/hpc_anna/GMiC/Data/syn_lstm_wins/test_dynamic.npy") test_labels = np.load("/storage/hpc_anna/GMiC/Data/syn_lstm_wins/test_labels.npy") print "Training HMM classifier..." hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(2, 2, 'full', train_data, train_labels) print hmmcl.test(model_pos, model_neg, test_data, test_labels)
# # Sanity Checks # print "Error ratio: %.2f, expected performance of a lonely model is %.2f, of the joint model %.2f" % (error_ratio, 1 - error_ratio, 1 - error_ratio + error_ratio / 2.0) # a) static data classification rf = RandomForestClassifier(n_estimators=100) rf.fit(static_train_data, static_train_labels) print "Random Forest with static features on validation set: %.4f" % rf.score(static_val_data, static_val_labels) # b) dynamic data classification hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(3, 10, dynamic_train_data, dynamic_train_labels) print "HMM with dynamic features on validation set: %.4f" % hmmcl.test(model_pos, model_neg, dynamic_val_data, dynamic_val_labels) # # Evaluating Joint Model # print "" print "Evaluating joint model:" print "Splitting data in two halves..." fh_idx = np.random.choice(range(0, dynamic_train_data.shape[0]), size=np.round(dynamic_train_data.shape[0] * 0.5, 0), replace=False) sh_idx = list(set(range(0, dynamic_train_data.shape[0])) - set(fh_idx)) fh_data = dynamic_train_data[fh_idx, :, :] fh_labels = dynamic_train_labels[fh_idx] sh_data = dynamic_train_data[sh_idx, :, :] sh_labels = dynamic_train_labels[sh_idx]
scores[3].append(rf.score(enriched_by_hmm[val_idx], labels_all[val_idx])) # RF on static features (5) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(static_all[train_idx], labels_all[train_idx]) scores[5].append(rf.score(static_all[val_idx], labels_all[val_idx])) # RF on dynamic features (6) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(dynamic_as_static[train_idx], labels_all[train_idx]) scores[6].append(rf.score(dynamic_as_static[val_idx], labels_all[val_idx])) # HMM on dynamic features (7) hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) acc, auc = hmmcl.test(model_pos, model_neg, dynamic_all[val_idx], labels_all[val_idx]) scores[7].append(acc) # HMM on dynamic and static (turned into fake sequences) (9) hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_and_static_as_dynamic[train_idx], labels_all[train_idx]) scores[9].append(hmmcl.test(model_pos, model_neg, dynamic_and_static_as_dynamic[val_idx], labels_all[val_idx])) # RF on static and dynamic (spatialized) features (11) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(static_and_dynamic_as_static[train_idx], labels_all[train_idx]) scores[11].append(rf.score(static_and_dynamic_as_static[val_idx], labels_all[val_idx])) print "===> (1) Ensemble (RF) on predictions by RF and HMM: %.4f (+/- %.4f) %s" % (np.mean(scores[1]), np.std(scores[1]), scores[1]) print "===> (3) Hybrid (RF) on features enriched by HMM: %.4f (+/- %.4f) %s" % (np.mean(scores[3]), np.std(scores[3]), scores[3]) print "===> (5) RF on static features: %.4f (+/- %.4f) %s" % (np.mean(scores[5]), np.std(scores[5]), scores[5])
scores[4].append(rf.score(test_enriched_by_lstm, test_labels)) # RF on static features (5) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(trainB_static, trainB_labels) scores[5].append(rf.score(test_static, test_labels)) # RF on dynamic features (6) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(trainB_dynamic_as_static, trainB_labels) scores[6].append(rf.score(test_dynamic_as_static, test_labels)) # HMM on dynamic features (7) hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, trainB_dynamic, trainB_labels) acc, auc = hmmcl.test(model_pos, model_neg, test_dynamic, test_labels) scores[7].append(acc) # LSTM on dynamic features (8) lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(trainB_dynamic, trainB_labels) scores[8].append(lstmcl.test(model_pos, model_neg, test_dynamic, test_labels)) # HMM on dynamic and static (turned into fake sequences) (9) #hmmcl = HMMClassifier() #model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, trainB_dynamic_and_static_as_dynamic, trainB_labels_all) #acc, auc = hmmcl.test(model_pos, model_neg, test_dynamic_and_static_as_dynamic, test_labels) #scores[9].append(acc) # LSTM on dynamic and static (turned into fake sequences) (10) #lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize)
static_val = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/test_data.npy') dynamic_val = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_data.npy') labels_train = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy') labels_val = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_labels.npy') nsamples = dynamic_train.shape[0] # split indices into folds val_idx_list = np.array_split(range(nsamples), nfolds) # run CV scores = [] for fid, val_idx in enumerate(val_idx_list): print "Current fold is %d/%d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(val_idx)) # train the model and report performance hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nstates, niter, 'full', dynamic_train[train_idx], labels_train[train_idx]) scores.append( hmmcl.test(model_pos, model_neg, dynamic_train[val_idx], labels_train[val_idx])) print "===> (7) HMM with dynamic features on CV: %.4f (+/- %.4f) %s" % ( np.mean(scores), np.std(scores), scores)
rf.fit(static_all[train_idx], labels_all[train_idx]) print "===> (5) RF on static features: %.4f" % rf.score( static_all[test_idx], labels_all[test_idx]) # RF on dynamic features (6) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(dynamic_as_static[train_idx], labels_all[train_idx]) print "===> (6) RF on dynamic (spatialized) features: %.4f" % rf.score( dynamic_as_static[test_idx], labels_all[test_idx]) # HMM on dynamic features (7) hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) print "===> (7) HMM on dynamic features: %.4f" % hmmcl.test( model_pos, model_neg, dynamic_all[test_idx], labels_all[test_idx]) # LSTM on dynamic features (8) lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) print "===> (8) LSTM on dynamic features: %.4f" % lstmcl.test( model_pos, model_neg, dynamic_all[test_idx], labels_all[test_idx]) # HMM on dynamic and static (turned into fake sequences) (9) hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_and_static_as_dynamic[train_idx], labels_all[train_idx]) print "===> (9) HMM on dynamic and static features: %.4f" % hmmcl.test(
print "===> (4) Hybrid (RF) on features enriched by LSTM: %.4f" % rf.score(enriched_by_lstm[test_idx], labels_all[test_idx]) # RF on static features (5) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(static_all[train_idx], labels_all[train_idx]) print "===> (5) RF on static features: %.4f" % rf.score(static_all[test_idx], labels_all[test_idx]) # RF on dynamic features (6) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(dynamic_as_static[train_idx], labels_all[train_idx]) print "===> (6) RF on dynamic (spatialized) features: %.4f" % rf.score(dynamic_as_static[test_idx], labels_all[test_idx]) # HMM on dynamic features (7) hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) print "===> (7) HMM on dynamic features: %.4f" % hmmcl.test(model_pos, model_neg, dynamic_all[test_idx], labels_all[test_idx]) # LSTM on dynamic features (8) lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) print "===> (8) LSTM on dynamic features: %.4f" % lstmcl.test(model_pos, model_neg, dynamic_all[test_idx], labels_all[test_idx]) # HMM on dynamic and static (turned into fake sequences) (9) hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_and_static_as_dynamic[train_idx], labels_all[train_idx]) print "===> (9) HMM on dynamic and static features: %.4f" % hmmcl.test(model_pos, model_neg, dynamic_and_static_as_dynamic[test_idx], labels_all[test_idx]) # LSTM on dynamic and static (turned into fake sequences) (10) lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs) model_pos, model_neg = lstmcl.train(dynamic_and_static_as_dynamic[train_idx], labels_all[train_idx]) print "===> (10) LSTM on dynamic features: %.4f" % lstmcl.test(model_pos, model_neg, dynamic_and_static_as_dynamic[test_idx], labels_all[test_idx])
# RF on static features (5) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(trainB_static, trainB_labels) scores[5].append(rf.score(test_static, test_labels)) # RF on dynamic features (6) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(trainB_dynamic_as_static, trainB_labels) scores[6].append(rf.score(test_dynamic_as_static, test_labels)) # HMM on dynamic features (7) hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, trainB_dynamic, trainB_labels) acc, auc = hmmcl.test(model_pos, model_neg, test_dynamic, test_labels) scores[7].append(acc) # LSTM on dynamic features (8) lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(trainB_dynamic, trainB_labels) scores[8].append(lstmcl.test(model_pos, model_neg, test_dynamic, test_labels)) # HMM on dynamic and static (turned into fake sequences) (9) #hmmcl = HMMClassifier() #model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, trainB_dynamic_and_static_as_dynamic, trainB_labels_all) #acc, auc = hmmcl.test(model_pos, model_neg, test_dynamic_and_static_as_dynamic, test_labels) #scores[9].append(acc) # LSTM on dynamic and static (turned into fake sequences) (10)
# # Prepare combined datasets for the future experiments # # dataset to check how generative models perform if provided with static features along with dynamic static_as_dynamic = np.zeros((static_all.shape[0], static_all.shape[1], dynamic_all.shape[2])) for i in range(static_all.shape[0]): static_as_dynamic[i, :, :] = np.tile(static_all[i, :], (dynamic_all.shape[2], 1)).T dynamic_and_static_as_dynamic = np.concatenate((dynamic_all, static_as_dynamic + np.random.uniform(-0.0001, 0.0001, static_as_dynamic.shape)), axis=1) # # k-fold CV for performance estimation # val_idx_list = np.array_split(range(nsamples), nfolds) scores = {1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 11: []} for fid, val_idx in enumerate(val_idx_list): print "Current fold is %d / %d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(val_idx)) # HMM on dynamic and static (turned into fake sequences) (9) hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_and_static_as_dynamic[train_idx], labels_all[train_idx]) acc, auc = hmmcl.test(model_pos, model_neg, dynamic_and_static_as_dynamic[val_idx], labels_all[val_idx]) scores[9].append(acc) print "===> (9) HMM on dynamic and static features: %.4f (+/- %.4f) %s" % (np.mean(scores[9]), np.std(scores[9]), scores[9])
scores[4] = rf.score(test_enriched_by_lstm, test_labels) # RF on static features (5) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(trainB_static, trainB_labels) scores[5] = rf.score(test_static, test_labels) # RF on dynamic features (6) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(trainB_dynamic_as_static, trainB_labels) scores[6] = rf.score(test_dynamic_as_static, test_labels) # HMM on dynamic features (7) hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, trainB_dynamic, trainB_labels) acc, auc = hmmcl.test(model_pos, model_neg, test_dynamic, test_labels) scores[7] = acc # LSTM on dynamic features (8) lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(trainB_dynamic, trainB_labels) scores[8] = lstmcl.test(model_pos, model_neg, test_dynamic, test_labels) # HMM on dynamic and static (turned into fake sequences) (9) hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, trainB_dynamic_and_static_as_dynamic, trainB_labels) acc, auc = hmmcl.test(model_pos, model_neg, test_dynamic_and_static_as_dynamic, test_labels) scores[9] = acc # LSTM on dynamic and static (turned into fake sequences) (10) lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize)