def trainWithLSTM(params): inSeqLen = params['inSeqLen'] outSeqLen = params['outSeqLen'] numFeatures = params['numFeatures'] lstm_layers = params['lstm_layers'] datasetObj = params['datasetObj'] allRecords = params['allRecords'] csvRecordInfo = params['csvRecordInfo'] epochSeconds = params['epochSeconds'] slidingWindowSeconds = params['slidingWindowSeconds'] priorSeconds = params['priorSeconds'] postSeconds = params['postSeconds'] trainingEpochs = params['trainingEpochs'] trainingBatchSize = params['trainingBatchSize'] validation_split = params['validation_split'] modelOutputDir = params['modelOutputDir'] savedModelFilePrefix = params['savedModelFilePrefix'] lstmObj = eegLSTM("encoder_decoder_sequence") # lstmObj = eegLSTM("stacked_LSTM") lstmObj.createModel(inSeqLen, outSeqLen, numFeatures, lstm_layers) if (dataFormat == "CSV"): numSamples = lstmObj.prepareDataSubset_fromCSV(datasetObj, allRecords, csvRecordInfo, (epochSeconds*1000), (slidingWindowSeconds*1000), priorSeconds, postSeconds) # If the number of samples is too low, there is no point in training with this dataset if (numSamples <= 10): print ("numSamples ({}) is too low! Returning without creating a saved model!".format(numSamples)) return else: print ("LSTM model for EDF format is not yet implemented in this version") exit(-1) lstmObj.fit(trainingEpochs, trainingBatchSize, validation_split) lstmObj.saveModel(modelOutputDir, savedModelFilePrefix) return
def testWithLSTM(modelFile, weightsFile, numFeaturesInTestFiles, allFiles): lstmObj = eegLSTM("encoder_decoder_sequence") lstmObj.loadModel(modelFile, weightsFile) print("Loaded LSTM model from disk") if (numFeaturesInTestFiles != lstmObj.numFeatures): print ("number of features in testfiles ", numFeaturesInTestFiles, "!= number of feature in loaded model ", lstmObj.numFeatures) for testFilePath in allFiles.values(): print ("testFilePath = ", testFilePath) lstmObj.prepareDataset_fullfile(testFilePath) # dataset = np.loadtxt(testFile, delimiter=',') # X = dataset[:,:19] # y = dataset[:,19] lstmObj.evaluate()
if (dataSubset == "fulldata"): print("Will be training on full data") priorSeconds = postSeconds = -1 elif (re.search("seizure\-(\d+), seizure\+(\d+)", dataSubset) != None): m = re.match("seizure\-(\d+), seizure\+(\d+)", dataSubset) priorSeconds = int(m.group(1)) postSeconds = int(m.group(2)) print("data subset = [seizure-" + str(priorSeconds), ", seizure+" + str(postSeconds) + "]") # Verify that all the records have same features features = tuhd.recordInfo[allRecords[0]]['channelLabels'] featuresSet = set(features) for recordID in allRecords: tmpSet = set(tuhd.recordInfo[recordID]['channelLabels']) xorSet = featuresSet.symmetric_difference(tmpSet) if (len(xorSet) > 0): print("features are not common between", allRecords[0], "and", recordID) exit(-1) print("features are common between all the records!") numFeatures = len(featuresSet) lstmObj = eegLSTM("encoder_decoder_sequence") # lstmObj = eegLSTM("stacked_LSTM") lstmObj.createModel(inSeqLen, outSeqLen, numFeatures, lstmLayers) lstmObj.prepareDataset_fromTUHedf(tuhd, allRecords, priorSeconds, postSeconds) lstmObj.fit(epochs, batchsize) lstmObj.saveModel(modelOutputDir, recordID + "LSTM")
def testWithHybridModel(lstmModelFile, lstmWeightsFile, dnnModelFile, dnnWeightsFile, numFeaturesInTestFiles, allFiles, timeStepsToPredict): lstmObj = eegLSTM("encoder_decoder_sequence") lstmObj.loadModel(lstmModelFile, lstmWeightsFile) print("Loaded LSTM model from disk") dnnObj = eegDNN("Classifier_3layers") dnnObj.loadModel(dnnModelFile, dnnWeightsFile) print("Loaded DNN model from disk") if (numFeaturesInTestFiles != lstmObj.numFeatures): print ("number of features in testfiles ", numFeaturesInTestFiles, "!= number of feature in loaded model ", lstmObj.numFeatures) if (numFeaturesInTestFiles != dnnObj.numFeatures): print ("number of features in testfiles ", numFeaturesInTestFiles, "!= number of feature in loaded model ", dnnObj.numFeatures) for testFilePath in allFiles.values(): print ("testFilePath = ", testFilePath) dataset = pd.read_csv(testFilePath) dataset = dataset.values # Convert to a numpy array from pandas dataframe numFeatures = lstmObj.numFeatures inSeqLen = lstmObj.inSeqLen outSeqLen = lstmObj.outSeqLen numRowsNeededForTest = max((inSeqLen + outSeqLen), (inSeqLen+timeStepsToPredict)) numRows = dataset.shape[0] print ("inSeqLen={}, outSeqLen={}, numFeatures={}, numRows={}, numRowsNeededForTest={}".format( inSeqLen, outSeqLen, numFeatures, numRows, numRowsNeededForTest )) # lstmObj.prepareDataset_fullfile(testFilePath) while (numRows > numRowsNeededForTest): numRemainingRows = min (numRows, (inSeqLen+timeStepsToPredict)) # print ("numRows={}, numFeatures={}, numRemainingRows={}" # .format(numRows, numFeatures, numRemainingRows)) predictedDataset = np.empty((1, (inSeqLen+timeStepsToPredict), numFeatures)) predictedSeizureValues = np.empty((inSeqLen+timeStepsToPredict)) inputRowStart = 0 inputRowEnd = inputRowStart + inSeqLen outputRowStart = inputRowEnd outputRowEnd = outputRowStart + outSeqLen # print ("inputRowStart={}, inputRowEnd={}, outputRowStart={}, outputRowEnd={}" # .format(inputRowStart, inputRowEnd, outputRowStart, outputRowEnd)) predictedDataset[0, inputRowStart:inputRowEnd,:numFeatures] = dataset[inputRowStart:inputRowEnd, :numFeatures] predictedSeizureValues[inputRowStart:inputRowEnd] = dataset[inputRowStart:inputRowEnd, numFeatures] while (numRemainingRows >= numRowsNeededForTest): predictedDataset[:, outputRowStart:outputRowEnd, :] = \ lstmObj.getModel().predict(predictedDataset[:, inputRowStart:inputRowEnd, :]) for i in range(outSeqLen): predictedSeizureValues[outputRowStart+i] = dnnObj.getModel().predict(predictedDataset[:, outputRowStart+i, :]) inputRowStart += outSeqLen inputRowEnd = inputRowStart + inSeqLen outputRowStart = inputRowEnd outputRowEnd = outputRowStart + outSeqLen numRemainingRows -= outSeqLen # print ("inputRowStart={}, inputRowEnd={}, outputRowStart={}, outputRowEnd={}" # .format(inputRowStart, inputRowEnd, outputRowStart, outputRowEnd)) # print ("predictedDataset = ", predictedDataset[0, inSeqLen:min (numRows, (inSeqLen+timeStepsToPredict)), :numFeatures]) # print ("actual dataset = ", dataset[inSeqLen:min (numRows, (inSeqLen+timeStepsToPredict)), :numFeatures]) calculateMetrics(predictedSeizureValues[inSeqLen:], dataset[inSeqLen:,numFeatures]) dataset = np.delete(dataset, list(range(timeStepsToPredict)), axis=0) numRows = dataset.shape[0] return