def evaluate_subject_models(data, labels, modelpath, subject): """ Trains and evaluates EEgNet for a given subject in the P300 Speller database using repeated stratified K-fold cross validation. """ n_sub = data.shape[0] n_ex_sub = data.shape[1] n_samples = data.shape[2] n_channels = data.shape[3] aucs = np.zeros(5 * 10) print("Training for subject {0}: ".format(subject)) cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=10, random_state=123) for k, (t, v) in enumerate(cv.split(data[subject], labels[subject])): X_train, y_train, X_test, y_test = data[subject, t, :, :], labels[ subject, t], data[subject, v, :, :], labels[subject, v] X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, shuffle=True, random_state=456) print( 'Partition {0}: X_train = {1}, X_valid = {2}, X_test = {3}'.format( k, X_train.shape, X_valid.shape, X_test.shape)) # channel-wise feature standarization sc = EEGChannelScaler(n_channels=n_channels) X_train = np.swapaxes( sc.fit_transform(X_train)[:, np.newaxis, :], 2, 3) X_valid = np.swapaxes(sc.transform(X_valid)[:, np.newaxis, :], 2, 3) X_test = np.swapaxes(sc.transform(X_test)[:, np.newaxis, :], 2, 3) model = EEGNet(2, Chans=n_channels, Samples=n_samples) print(model.summary()) model.compile(optimizer='adam', loss='categorical_crossentropy') # Early stopping setting also follows EEGNet (Lawhern et al., 2018) es = EarlyStopping(monitor='val_loss', mode='min', patience=50, restore_best_weights=True) history = model.fit(X_train, to_categorical(y_train), batch_size=256, epochs=200, validation_data=(X_valid, to_categorical(y_valid)), callbacks=[es]) proba_test = model.predict(X_test) aucs[k] = roc_auc_score(y_test, proba_test[:, 1]) print('S{0}, P{1} -- AUC: {2}'.format(subject, k, aucs[k])) K.clear_session() np.savetxt(modelpath + '/s' + str(subject) + '_aucs.npy', aucs)
def evaluate_subject_model(X_train, y_train, X_valid, y_valid, X_test, y_test, timepath): print('X_train = {0}, X_valid = {1}, X_test = {2}'.format( X_train.shape, X_valid.shape, X_test.shape)) n_samples = X_train.shape[1] n_channels = X_train.shape[2] sc = EEGChannelScaler(n_channels=n_channels) X_train = np.swapaxes(sc.fit_transform(X_train)[:, np.newaxis, :], 2, 3) X_valid = np.swapaxes(sc.transform(X_valid)[:, np.newaxis, :], 2, 3) X_test = np.swapaxes(sc.transform(X_test)[:, np.newaxis, :], 2, 3) model = EEGNet(2, Chans=n_channels, Samples=n_samples) model.compile(optimizer='adam', loss='categorical_crossentropy') tt = TrainTime() history = model.fit(X_train, to_categorical(y_train), batch_size=256, epochs=10, validation_data=(X_valid, to_categorical(y_valid)), callbacks=[tt]) start_test = time.time() proba_test = model.predict(X_test) test_time = time.time() - start_test train_size = X_train.shape[0] valid_size = X_valid.shape[0] test_size = X_test.shape[0] times = [[ np.mean(tt.times), np.sum(tt.times), 10, train_size, valid_size, test_time, test_size, test_time / test_size ]] df = pd.DataFrame(times, columns=[ 'Mean Epoch Time', 'Total Train Time', 'Epochs', 'Train Size', 'Valid Size', 'Test Time', 'Test Size', 'Test per example' ]) df.to_csv(timepath + 'EEGNet_times.csv', encoding='utf-8')
############################################################################### # the syntax is {class_1:weight_1, class_2:weight_2,...}. Here just setting # the weights all to be 1 class_weights = {0: 1, 1: 1} ################################################################################ # fit the model. Due to very small sample sizes this can get # pretty noisy run-to-run, but most runs should be comparable to xDAWN + # Riemannian geometry classification (below) ################################################################################ hist = model.fit(X_train, Y_train, batch_size=16, epochs=100, verbose=2, validation_split=0.33, shuffle=True, callbacks=[checkpointer], class_weight=class_weights) # load optimal weights model.load_weights('/content/gdrive/MyDrive/checkpoint.h5') ############################################################################### # can alternatively used the weights provided in the repo. If so it should get # you 93% accuracy. Change the WEIGHTS_PATH variable to wherever it is on your # system. ############################################################################### # WEIGHTS_PATH = /path/to/EEGNet-8-2-weights.h5
# count number of parameters in the model numParams = model.count_params() # set a valid path for your system to record model checkpoints checkpointer = ModelCheckpoint(filepath='/tmp/checkpoint.h5', verbose=1, save_best_only=True) # Weighted loss weight_0 = 1/(len([y for y in y_train_valid if y == 0])) weight_1 = 1/(len([y for y in y_train_valid if y == 1])) class_weights = {0: weight_0, 1: weight_1} # # # fit the model fittedModel = model.fit(X_train, y_train, batch_size=34, epochs=100, verbose=2, validation_data=(X_valid, y_valid), callbacks=[checkpointer], class_weight=class_weights) # load optimal weights model.load_weights('/tmp/checkpoint.h5') # Evaluate y_probs = model.predict(X_test) y_pred = y_probs.argmax(axis=-1) # save score csv = pd.read_csv('./data/benchmark.csv') csv['Prediction'] = y_probs csv.to_csv('./submission/submissionEEGNET.csv', index=False) print('--------------------Submission file has been generated.--------------------------')
def evaluate_cross_subject_model(data, labels, modelpath): """ Trains and evaluates EEGNet for each subject in the P300 Speller database using random cross validation. """ n_sub = data.shape[0] n_ex_sub = data.shape[1] n_samples = data.shape[2] n_channels = data.shape[3] aucs = np.zeros(n_sub) data = data.reshape((n_sub * n_ex_sub, n_samples, n_channels)) labels = labels.reshape((n_sub * n_ex_sub)) groups = np.array([i for i in range(n_sub) for j in range(n_ex_sub)]) cv = LeaveOneGroupOut() for k, (t, v) in enumerate(cv.split(data, labels, groups)): X_train, y_train, X_test, y_test = data[t], labels[t], data[v], labels[ v] rg = np.random.choice(t, 1) sv = groups[t] == groups[rg] st = np.logical_not(sv) X_train, y_train, X_valid, y_valid = data[t][st], labels[t][st], data[ t][sv], labels[t][sv] print("Partition {0}: train = {1}, valid = {2}, test = {3}".format( k, X_train.shape, X_valid.shape, X_test.shape)) print("Groups train = {0}, valid = {1}, test = {2}".format( np.unique(groups[t][st]), np.unique(groups[t][sv]), np.unique(groups[v]))) # channel-wise feature standarization sc = EEGChannelScaler(n_channels=n_channels) X_train = np.swapaxes( sc.fit_transform(X_train)[:, np.newaxis, :], 2, 3) X_valid = np.swapaxes(sc.transform(X_valid)[:, np.newaxis, :], 2, 3) X_test = np.swapaxes(sc.transform(X_test)[:, np.newaxis, :], 2, 3) model = EEGNet(2, dropoutRate=0.25, Chans=n_channels, Samples=n_samples) print(model.summary()) model.compile(optimizer='adam', loss='categorical_crossentropy') es = EarlyStopping(monitor='val_loss', mode='min', patience=50, restore_best_weights=True) model.fit(X_train, to_categorical(y_train), batch_size=256, epochs=200, validation_data=(X_valid, to_categorical(y_valid)), callbacks=[es]) proba_test = model.predict(X_test) aucs[k] = roc_auc_score(y_test, proba_test[:, 1]) print('P{0} -- AUC: {1}'.format(k, aucs[k])) K.clear_session() np.savetxt(modelpath + '/aucs.npy', aucs)
# the syntax is {class_1:weight_1, class_2:weight_2,...}. Here just setting # the weights all to be 1 # class_weights = {0:1, 1:1, 2:1, 3:1} # 0: Target, 1: Distractor, 2: Standard class_weights = {0: 8, 1: 8, 2: 1} ################################################################################ # fit the model. Due to very small sample sizes this can get # pretty noisy run-to-run, but most runs should be comparable to xDAWN + # Riemannian geometry classification (below) ################################################################################ fittedModel = model.fit(X_train, Y_train, batch_size=16, epochs=300, verbose=2, validation_data=(X_validate, Y_validate), callbacks=[checkpointer], class_weight=class_weights) # load optimal weights model.load_weights('/tmp/checkpoint.h5') ############################################################################### # can alternatively used the weights provided in the repo. If so it should get # you 93% accuracy. Change the WEIGHTS_PATH variable to wherever it is on your # system. ############################################################################### # WEIGHTS_PATH = /path/to/EEGNet-8-2-weights.h5 # model.load_weights(WEIGHTS_PATH)
# class versus the others) you can assign a weight to each class during # optimization to balance it out. This data is approximately balanced so we # don't need to do this, but is shown here for illustration/completeness. ############################################################################### # the syntax is {class_1:weight_1, class_2:weight_2,...}. Here just setting # the weights all to be 1 class_weights = {0:1, 1:1} ################################################################################ # fit the model. Due to very small sample sizes this can get # pretty noisy run-to-run, but most runs should be comparable to xDAWN + # Riemannian geometry classification (below) ################################################################################ hist = model.fit(X_train, Y_train, batch_size = 16, epochs = 300, verbose = 2,validation_split=0.33, class_weight = class_weights) # load optimal weights ############################################################################### # can alternatively used the weights provided in the repo. If so it should get # you 93% accuracy. Change the WEIGHTS_PATH variable to wherever it is on your # system. ############################################################################### # WEIGHTS_PATH = /path/to/EEGNet-8-2-weights.h5 # model.load_weights(WEIGHTS_PATH) ###############################################################################
# X_train = X_train[:,4:9,:,50:150] # X_test = X_test[:,4:9,:,50:150] #format to match EEGnet # X_train = EEGnetFormat(X_train) # X_test = EEGnetFormat(X_test) model = EEGNet(nb_classes=1, Chans=35, Samples=100) model.compile(optimizer='adam', loss=['binary_crossentropy'], metrics=['accuracy']) print(model.summary()) #train the model csv_logger = CSVLogger(out + '.log') filepath = out + ".hdf5" tensorboard = TensorBoard(log_dir="../logs/{}_{}".format(out, time())) checkpointer = ModelCheckpoint(monitor='val_loss', filepath=filepath, verbose=1, save_best_only=True) early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10) model.fit(x=X_train, y=y_train, batch_size=128, epochs=epochs, validation_data=(X_test, y_test), callbacks=[checkpointer, csv_logger, tensorboard, early_stop])
# Define and Train model # model = DeepConvNet((nb_classes, Chans = 32, Samples = trial_n,dropoutRate = 0.5): model = EEGNet(nb_classes=2, Chans=56, Samples=trial_n, regRate=0.001, dropoutRate=0.25, kernels=[(2, 32), (8, 4)]) # model = EEGNet(nb_classes=2, Chans=32, Samples=trial_n, dropoutRate=0.4, kernels=[(2, 64), (8, 32)]) # model = EEGNet(nb_classes=2, Chans=32, Samples=260, dropoutRate=dropout_trial, kernels=kernel_trial) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(train_data, train_labels, batch_size=batchsize, epochs=500, verbose=2) auc_values = [] """ for n in range(0,500,10): model.fit(train_data, train_labels, batch_size=batchsize, epochs=10, verbose=2) test_predict = model.predict(test_data, batch_size=batchsize, verbose=0) fpr, tpr, thresholds = roc_curve(flatten_test_labels, test_predict[:, 1], pos_label=1) test_auc_value = auc(fpr, tpr) auc_values.append(test_auc_value) plt.plot(range(0,500,10),auc_values) plt.title('Test AUC for filtered EEG') plt.ylabel('AUC') plt.xlabel('epoch') plt.show()
if (False): # load previous model model = load_model(input('Model name:') + '.h5') else: # Define and Train model model = EEGNet(nb_classes=2, Chans=channels_n, Samples=trial_n, regRate=0.001, dropoutRate=0.25, kernels=[(2, 32), (8, 4)]) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) history = model.fit(train_data, train_labels, batch_size=batchsize, epochs=300, verbose=2) # model.save('***.h5') # generate prediction probabilities train_predict = model.predict(train_data, batch_size=batchsize, verbose=0) validation_predict = model.predict(validation_data, batch_size=batchsize, verbose=0) test_predict = model.predict(test_data, batch_size=batchsize, verbose=0) validation_accuracy = model.evaluate(validation_data, validation_labels, batch_size=batchsize) print(validation_accuracy) test_accuracy = model.evaluate(test_data, test_labels,
# optimization to balance it out. This data is approximately balanced so we # don't need to do this, but is shown here for illustration/completeness. ############################################################################### # the syntax is {class_1:weight_1, class_2:weight_2,...}. Here just setting # the weights all to be 1 class_weights = {0: 1, 1: 1, 2: 1, 3: 1} ################################################################################ # fit the model. Due to very small sample sizes this can get # pretty noisy run-to-run, but most runs should be comparable to xDAWN + # Riemannian geometry classification (below) ################################################################################ hist = model.fit(X_train, Y_train, batch_size=16, epochs=300, verbose=2, validation_data=(X_validate, Y_validate)) # load optimal weights #model.load_weights('C:/Users/PC/PycharmProjects/Artigence/PJS/EEGNet_test/tmp/variables/checkpoints.h2') ############################################################################### # can alternatively used the weights provided in the repo. If so it should get # you 93% accuracy. Change the WEIGHTS_PATH variable to wherever it is on your # system. ############################################################################### # WEIGHTS_PATH = /path/to/EEGNet-8-2-weights.h5 # model.load_weights(WEIGHTS_PATH)
def trainAndPredict( self, epochs=300, batchSize=1000, class_weights=None, F1=8, D=2, kernLength=None, dropoutRate=0.5, learningRate=0.001, ): if class_weights is None: class_weights = getClassWeights(self.y_train) if kernLength is None: kernLength = int(self.samples / 2) # class_weights = {1:1, 0:1} # class_weights = {0:22, 1:1} # configure the EEGNet-8,2,16 model with kernel length of 32 samples (other # model configurations may do better, but this is a good starting point) F2 = F1 * D print('F1 (temporal filters)', F1) print('D (spatial filters', D) print('F2 (pointwise filters', F2) print('kernLength', kernLength) print('learningRate', learningRate) print('class_weights', class_weights) print('epochs', epochs) print('batchSize', batchSize) model = EEGNet(nb_classes=getNumClasses(), Chans=self.chans, Samples=self.samples, dropoutRate=dropoutRate, kernLength=kernLength, F1=F1, D=D, F2=F2, dropoutType='Dropout') # model = DeepConvNet(nb_classes=getNumClasses(), Chans=self.chans, Samples=self.samples, dropoutRate=dropoutRate) # model = EEGNet_old(nb_classes = getNumClasses(), Chans = self.chans, Samples = self.samples, # dropoutRate = dropoutRate) optimizer = Adam(lr=learningRate) metrics = ['accuracy'] model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=metrics) # set a valid path for your system to record model checkpoints checkpointer = ModelCheckpoint(filepath='/tmp/checkpoint.h5', verbose=1, save_best_only=True) class OnEpochEndCallback(Callback): def on_epoch_end(self, epoch, logs=None): x_test = self.validation_data[0] y_test = self.validation_data[1] # x_test, y_test = self.validation_data predictions = self.model.predict(x_test) y_test = np.argmax(y_test, axis=-1) predictions = np.argmax(predictions, axis=-1) c = confusion_matrix(y_test, predictions) roc_auc = roc_auc_score(y_test, predictions) print('Confusion matrix:\n', c) print('sensitivity', c[0, 0] / (c[0, 1] + c[0, 0])) print('specificity', c[1, 1] / (c[1, 1] + c[1, 0])) print('roc_auc_score', roc_auc) model.fit(self.X_train, self.Y_train, batch_size=batchSize, epochs=epochs, verbose=2, validation_data=(self.X_validate, self.Y_validate), callbacks=[checkpointer, OnEpochEndCallback()], class_weight=class_weights) probs = model.predict(self.X_test) preds = probs.argmax(axis=-1) acc = np.mean(preds == self.Y_test.argmax(axis=-1)) print("Classification accuracy: %f " % (acc)) if getNumClasses() == 2: roc_auc = roc_auc_score(self.y_test, preds) print('roc_auc_score', roc_auc) probsConverted = probs[:, 1] fpr, tpr, thresholds = roc_curve(self.y_test, probsConverted) gmeans = np.sqrt(tpr * (1 - fpr)) # locate the index of the largest g-mean ix = np.argmax(gmeans) print('Best Threshold=%f, G-Mean=%.3f' % (thresholds[ix], gmeans[ix])) roc_auc = auc(fpr, tpr) plt.title('Receiver Operating Characteristic') plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % roc_auc) plt.scatter(fpr[ix], tpr[ix], marker='o', color='black', label='Best') plt.legend(loc='lower right') plt.plot([0, 1], [0, 1], 'r--') plt.xlim([0, 1]) plt.ylim([0, 1]) plt.ylabel('True Positive Rate') plt.xlabel('False Positive Rate') plt.savefig('roc') print('confusion_matrix') print(confusion_matrix(self.y_test, preds)) log(epochs, batchSize, self.samples, kernLength, dropoutRate, learningRate, roc_auc, acc, F1, D)