def save_pickle_file(filename, data): start = time.get_seconds() filename = filename + ".pickle" print "Dumping to %s" % filename, with open(filename, "w") as f: pickle.dump(data, f) print "%ds" % (time.get_seconds() - start)
def EU_matlab_run(self, load_Core): start_time = time.get_seconds() # load_Core.matlab_engin.loading_EU_main(nargout=0) # X = load_Core.matlab_engin.workspace['X'] # y = load_Core.matlab_engin.workspace['y'] matFile = load_EU_features(self.task_core.data_dir, self.task_core.target, load_Core) matlab_load_core = Matlab_Load_Core(matFile) matlab_load_core.target = self.task_core.target matlab_load_core.y_train = class_relabel(matlab_load_core.y_train) if (matlab_load_core.y_test is not None): matlab_load_core.y_test = class_relabel(matlab_load_core.y_test) matlab_load_core.structural_inf = load_side_adj( self.task_core.sidinfo_dir, self.task_core.target, self.task_core.adj_calc_mode, load_Core) print(' X training', matlab_load_core.X_train.shape, 'y training', matlab_load_core.y_train.shape) print(' X testing', matlab_load_core.X_test.shape, 'y testing', matlab_load_core.y_test.shape) matlab_load_core.settings_TrainNumFiles, matlab_load_core.settings_TestNumFiles = load_EU_settings( self.task_core.settings_dir, self.task_core.target, load_Core) print(' time elapsed: ', time.get_seconds() - start_time) return matlab_load_core.X_train.shape[ 1], matlab_load_core.X_train.shape[2:], matlab_load_core
def save_pickle_file(filename, data): start = time.get_seconds() filename = filename + '.pickle' print 'Dumping to %s' % filename, with open(filename, 'w') as f: pickle.dump(data, f) print '%ds' % (time.get_seconds() - start)
def load_data(self): global significant_channels global subj subj = self.task_core.target # to be used in auc record start = time.get_seconds() filename = 'data-cache/significant_channels_%s' % self.task_core.target significant_channels = SignificantChannels(self.task_core).load_data() save_hkl_file(filename, significant_channels) print significant_channels print 'ACS time is %d s' % (time.get_seconds() - start) start = time.get_seconds() data = TrainingDataTask(self.task_core).run() y_classes = data.y_classes del data point = time.get_seconds() time_prepare = (point - start) print 'Time to prepare data for %s is %f seconds.' % ( self.task_core.target, time_prepare) classifier_data = TrainClassifierTask(self.task_core).run() point2 = time.get_seconds() time_training = (point2 - point) print 'Time to train data for %s is %f seconds.' % ( self.task_core.target, time_training) test_data = LoadTestDataTask(self.task_core).run() X_test = flatten(test_data.X) return make_predictions(self.task_core.target, X_test, y_classes, classifier_data)
def train(classifier, training_data, quiet=False): X_train = training_data.X_train y_train = training_data.y_train if not quiet: print 'Training ...', start = time.get_seconds() classifier.fit(X_train, y_train) if not quiet: print '%ds' % (time.get_seconds() - start)
def MITCHB_run(self, load_Core): start_time = time.get_seconds() out_data = load_edf_data(self.task_core.data_dir, self.task_core.target, load_Core) num_clips = [] if (load_Core.concat): X = None y = None else: X = [] y = [] for data, file_name, seizure_start_time_offsets, seizure_lengths in out_data: inner_x, inner_y, num_nodes, dim, conv_sizes = windowing_data( data, seizure_start_time_offsets, seizure_lengths, load_Core) # print('inner_x: ', np.array(inner_x).shape) # num_clips.append(np.array(inner_x).shape[0]) if (load_Core.concat): if (X is None): X = np.array(inner_x) y = np.array(inner_y) else: X = np.concatenate((X, inner_x), axis=0) y = np.concatenate((y, inner_y), axis=0) else: X.append(inner_x) y.append(inner_y) X = np.array(X) y = np.array(y) print(' X', X.shape, 'y', y.shape) print(' time elapsed: ', time.get_seconds() - start_time) return X, y, num_nodes, dim, conv_sizes #, num_clips
def train_calibrator(y, y_estimate, plot2file): print("Training calibrator...") start = time.get_seconds() preictal_predictions = [] p_y_cv = [0.0 if x == 0.0 else 1.0 for x in y] for i in range(len(y_estimate)): p = y_estimate[i] preictal = translate_prediction(p) preictal_predictions.append(preictal) fpr, tpr, thresholds = roc_curve(p_y_cv, preictal_predictions) p_roc_auc = auc(fpr, tpr) y_av = np.average(p_y_cv) y_std = np.std(p_y_cv) ye_av = np.average(preictal_predictions) ye_std = np.std(preictal_predictions) pl.clf() pl.hist(preictal_predictions, bins=50) pl.xlabel('preictal estimate') pl.ylabel('counts') pl.title('CV histogram (mean_cv= %0.3f, mean_es=%0.3f, std_es=%0.3f)' % (y_av, ye_av, ye_std)) # pl.show() plot2file.savefig() calibrate_matrix = np.array([ye_av, ye_std]) elapsedSecs = time.get_seconds() - start print("t=%ds score=%f" % (int(elapsedSecs), p_roc_auc)) return calibrate_matrix
def train(classifier, X_train, y_train, X_cv, y_cv, y_classes): print "Training ..." print 'Dim', 'X', np.shape(X_train), 'y', np.shape(y_train), 'X_cv', np.shape(X_cv), 'y_cv', np.shape(y_cv) start = time.get_seconds() total = y_train.shape[0] ictalnum = sum(y_train) interictalnum = total - ictalnum print ictalnum print interictalnum weight = np.concatenate((interictalnum/ictalnum*np.ones(ictalnum),np.ones(interictalnum))) print weight #classifier.fit(X_train, y_train, sample_weight=weight) classifier.fit(X_train,y_train) print "Scoring..." S= score_classifier_auc(classifier, X_cv, y_cv, y_classes) score = S elapsedSecs = time.get_seconds() - start print "t=%ds score=%f" % (int(elapsedSecs), score) return score, S
def train_calibrator(y, y_estimate, plot2file): print "Training calibrator..." start = time.get_seconds() preictal_predictions = [] p_y_cv = [0.0 if x == 0.0 else 1.0 for x in y] for i in range(len(y_estimate)): p = y_estimate[i] preictal = translate_prediction(p) preictal_predictions.append(preictal) fpr, tpr, thresholds = roc_curve(p_y_cv, preictal_predictions) p_roc_auc = auc(fpr, tpr) y_av = np.average(p_y_cv) y_std = np.std(p_y_cv) ye_av = np.average(preictal_predictions) ye_std = np.std(preictal_predictions) pl.clf() pl.hist(preictal_predictions, bins=50) pl.xlabel('preictal estimate') pl.ylabel('counts') pl.title('CV histogram (mean_cv= %0.3f, mean_es=%0.3f, std_es=%0.3f)' %(y_av, ye_av, ye_std)) # pl.show() plot2file.savefig() calibrate_matrix = np.array([ye_av, ye_std]) elapsedSecs = time.get_seconds() - start print "t=%ds score=%f" % (int(elapsedSecs), p_roc_auc) return calibrate_matrix
def load_hkl_file(filename): hkl_filename = filename + '.hkl' if os.path.isfile(hkl_filename): start = time.get_seconds() data = hkl.load(hkl_filename) print 'Loaded %s in %ds' % (hkl_filename, time.get_seconds() - start) return data return None
def train_all_data(classifier, X_train, y_train, X_cv, y_cv): print "Training ..." X = np.concatenate((X_train, X_cv), axis=0) y = np.concatenate((y_train, y_cv), axis=0) print 'Dim', np.shape(X), np.shape(y) start = time.get_seconds() classifier.fit(X, y) elapsedSecs = time.get_seconds() - start print "t=%ds" % int(elapsedSecs)
def load_pickle_file(filename): filename = filename + '.pickle' if os.path.isfile(filename): print 'Loading %s ...' % filename, with open(filename) as f: start = time.get_seconds() data = pickle.load(f) print '%ds' % (time.get_seconds() - start) return data return None
def load_pickle_file(filename): filename = filename + ".pickle" if os.path.isfile(filename): print "Loading %s ..." % filename, with open(filename) as f: start = time.get_seconds() data = pickle.load(f) print "%ds" % (time.get_seconds() - start) return data return None
def train(classifier, X_train, y_train, X_cv, y_cv, y_classes): print "Training ..." print 'Dim', 'X', np.shape(X_train), 'y', np.shape(y_train), 'X_cv', np.shape(X_cv), 'y_cv', np.shape(y_cv) start = time.get_seconds() classifier.fit(X_train, y_train) print "Scoring..." score = score_classifier_auc(classifier, X_cv, y_cv, y_classes) elapsedSecs = time.get_seconds() - start print "t=%ds score=%f" % (int(elapsedSecs), score) return score
def train(classifier, X_train, y_train, X_cv, y_cv, y_classes): print("Training ...") print('Dim', 'X', np.shape(X_train), 'y', np.shape(y_train), 'X_cv', np.shape(X_cv), 'y_cv', np.shape(y_cv)) start = time.get_seconds() classifier.fit(X_train, y_train) print("Scoring...") score = score_classifier_auc(classifier, X_cv, y_cv, y_classes) elapsedSecs = time.get_seconds() - start print("t=%ds score=%f" % (int(elapsedSecs), score)) return score
def train_all_data(classifier, plot2file, X_train, y_train, X_cv, y_cv): print "Training ..." X = np.concatenate((X_train, X_cv), axis=0) y = np.concatenate((y_train, y_cv), axis=0) print 'Dim', np.shape(X), np.shape(y) start = time.get_seconds() classifier_cv = deepcopy(classifier) classifier.fit(X, y) classifier_cv.fit(X_train, y_train) score_classifier_auc(classifier_cv, plot2file, X_cv, y_cv, y_cv) y_estimate = classifier_cv.predict_proba(X_cv) elapsedSecs = time.get_seconds() - start print "t=%ds" % int(elapsedSecs) return y_estimate
def train_all_data(classifier, plot2file, X_train, y_train, X_cv, y_cv): print("Training ...") X = np.concatenate((X_train, X_cv), axis=0) y = np.concatenate((y_train, y_cv), axis=0) print('Dim', np.shape(X), np.shape(y)) start = time.get_seconds() classifier_cv = deepcopy(classifier) classifier.fit(X, y) classifier_cv.fit(X_train, y_train) score_classifier_auc(classifier_cv, plot2file, X_cv, y_cv, y_cv) y_estimate = classifier_cv.predict_proba(X_cv) elapsedSecs = time.get_seconds() - start print("t=%ds" % int(elapsedSecs)) return y_estimate
def process_raw_data(mat_data, ispreictal): start = time.get_seconds() X = [] y = [] latencies = [] prev_data = None prev_latency = None for segment in mat_data: for key in segment.keys(): if (key.find('segment')>0): keyname = key data = segment[keyname] # TODO:[email protected] data = data[0,0] datas = data['data'] sz = datas.shape for i in range(drate): data = datas[:,i*(sz[1]//drate):(i+1)*(sz[1]//drate)] transformed_data = pipeline.apply(data) if ispreictal: # this is preictal y.append(1) else: # this is interictal y.append(0) X.append(transformed_data) prev_data = data print '(%ds)' % (time.get_seconds() - start) X = np.array(X) y = np.array(y) latencies = np.array(latencies) if ictal: print 'X', X.shape, 'y', y.shape return X, y elif interictal: print 'X', X.shape, 'y', y.shape return X, y else: print 'X', X.shape return X
def load_data(self): global significant_channels global subj subj = self.task_core.target # to be used in auc record start = time.get_seconds() significant_channels = SignificantChannels(self.task_core).load_data() print significant_channels print 'ACS time is %d s' % (time.get_seconds() - start) #print aa data = TrainingDataTask(self.task_core).run() classifier_data = train_classifier(self.task_core.classifier, data, normalize=self.task_core.normalize) del classifier_data['classifier'] # save disk space return classifier_data
def prepare_training_data(ictal_data, interictal_data, cv_ratio): ictal_X, ictal_y = flatten(ictal_data.X), ictal_data.y interictal_X, interictal_y = flatten(interictal_data.X), interictal_data.y sz = ictal_X.shape num = sz[0] num2 = (interictal_X.shape)[0] sub = random.sample(range(0,interictal_X.shape[0]),min(num*3,num2)) #sub = random.sample(range(0,interictal_X.shape[0]),num) interictal_X = interictal_X[sub,:] interictal_y = interictal_y[sub] #chop data print "chop" print interictal_X.shape print ictal_X.shape # split up data into training set and cross-validation set for both seizure and early sets ictal_X_train, ictal_y_train, ictal_X_cv, ictal_y_cv = split_train_random(ictal_X, ictal_y, cv_ratio) interictal_X_train, interictal_y_train, interictal_X_cv, interictal_y_cv = split_train_random(interictal_X, interictal_y, cv_ratio) print interictal_X_train.shape def concat(a, b): return np.concatenate((a, b), axis=0) X_train = concat(ictal_X_train, interictal_X_train) y_train = concat(ictal_y_train, interictal_y_train) print X_train.shape X_cv = concat(ictal_X_cv, interictal_X_cv) y_cv = concat(ictal_y_cv, interictal_y_cv) y_classes = np.unique(concat(y_train, y_cv)) start = time.get_seconds() elapsedSecs = time.get_seconds() - start print "%ds" % int(elapsedSecs) print 'X_train:', np.shape(X_train) print 'y_train:', np.shape(y_train) print 'X_cv:', np.shape(X_cv) print 'y_cv:', np.shape(y_cv) print 'y_classes:', y_classes return { 'X_train': X_train, 'y_train': y_train, 'X_cv': X_cv, 'y_cv': y_cv, 'y_classes': y_classes }
def process_raw_data(mat_data, with_latency): start = time.get_seconds() print 'Loading data', X = [] y = [] latencies = [] for segment in mat_data: data = segment['data'] if (significant_channels is not None): data = data[significant_channels] if data.shape[-1] > 400: data = resample(data, 400, axis=data.ndim - 1) if with_latency: # this is ictal latency = segment['latency'][0] if latency <= 15: y_value = 0 # ictal <= 15 else: y_value = 1 # ictal > 15 y.append(y_value) latencies.append(latency) prev_latency = latency elif y is not None: y.append(2) transformed_data = pipeline.apply(data) X.append(transformed_data) print '(%ds)' % (time.get_seconds() - start) X = np.array(X) y = np.array(y) latencies = np.array(latencies) if ictal: print 'X', X.shape, 'y', y.shape, 'latencies', latencies.shape return X, y, latencies elif interictal: print 'X', X.shape, 'y', y.shape return X, y else: print 'X', X.shape return X
def train_all_data(classifier, X_train, y_train, X_cv, y_cv): print "Training ..." X = np.concatenate((X_train, X_cv), axis=0) y = np.concatenate((y_train, y_cv), axis=0) print 'Dim', np.shape(X), np.shape(y) start = time.get_seconds() total = (y.shape)[0] ictalnum = sum(y) interictalnum = total - ictalnum print ictalnum print interictalnum weight = np.concatenate((interictalnum/ictalnum*np.ones(ictalnum),np.ones(interictalnum))) print weight classifier.fit(X, y, sample_weight= weight) #np.set_printoptions(threshold=np.nan) elapsedSecs = time.get_seconds() - start print "t=%ds" % int(elapsedSecs)
def process_raw_data(mat_data): start = time.get_seconds() print 'Loading data', X = [] y = [] prev_data = None for segment in mat_data: data = segment['data'] yvalue = 1 if preictal else 0 transformed_data = pipeline.apply(data) if gen_preictal and prev_data is not None: axis = prev_data.ndim - 1 def split(d): return np.split(d, 2, axis=axis) new_data = np.concatenate( (split(prev_data)[1], split(data)[0]), axis=axis) transformed_new_data = pipeline.apply(new_data) X.append(transformed_new_data) y.append(yvalue) X.append(transformed_data) y.append(yvalue) prev_data = data print '(%ds)' % (time.get_seconds() - start) X = np.array(X) y = np.array(y) if preictal: print 'X', X.shape, 'y', y.shape return X, y elif interictal: print 'X', X.shape, 'y', y.shape return X, y else: print 'X', X.shape return X
def train_classifier(classifier, data, normalize=False): X_train = data.X_train y_train = data.y_train X_cv = data.X_cv y_cv = data.y_cv if normalize: X_train, X_cv = normalize_data(X_train, X_cv) print("Training ...") print('Dim', 'X', np.shape(X_train), 'y', np.shape(y_train), 'X_cv', np.shape(X_cv), 'y_cv', np.shape(y_cv)) start = time.get_seconds() classifier.fit(X_train, y_train) print("Scoring...") S, E = score_classifier_auc(classifier, X_cv, y_cv, data.y_classes) score = 0.5 * (S + E) elapsedSecs = time.get_seconds() - start print("t=%ds score=%f" % (int(elapsedSecs), score)) return {'classifier': classifier, 'score': score, 'S_auc': S, 'E_auc': E}
def process_raw_data(mat_data,splitsize): start = time.get_seconds() print 'Loading data', # print mat_data SamplePerFile = [] X = [] y = [] cc = 0 for segment in mat_data: cc += 1 print cc for skey in segment.keys(): if "data" in skey.lower(): mykey = skey data = segment[mykey][0][0][0] if np.all(data == 0): print 'All of data zero, filling random numbers' for s in range(int(240000/splitsize)): transformed_data = np.random.randn(transformed_data_length) X.append(transformed_data) SamplePerFile.append(int(240000/splitsize)) continue data_tmp = data[np.invert(np.all(data == 0, axis=1))] sampleSizeinSecond = data_tmp.shape[0] / 400 data = data_tmp.transpose() axis = data.ndim - 1 print sampleSizeinSecond '''DataSampleSize: split the 10 minutes data into several clips: For one second data clip, patient1 and patient2 were finished in 3 hours. Dog1 clashed after 7+ hours for out of memory try ten second data clip ''' DataSampleSize = splitsize # data.shape[1] / (totalSample * 1.0) # try to split data into equal size splitIdx = np.arange(DataSampleSize, data.shape[1], DataSampleSize) splitIdx = np.int32(np.ceil(splitIdx)) splitData = np.hsplit(data, splitIdx) SPF = 0 #pre_sample_size = 0 #channel = 16 # if target == '2': # channel = 14 for s in splitData: transformed_data = pipeline.apply(s) X.append(transformed_data) SPF += 1 SamplePerFile.append(SPF) print 'done' transformed_data_length=transformed_data.shape[0] X = np.array(X) print 'X', X.shape return X, SamplePerFile
def process_raw_data(mat_data): start = time.get_seconds() print 'Loading data', X = [] y = [] prev_data = None for segment in mat_data: data = segment['data'] yvalue = 1 if preictal else 0 transformed_data = pipeline.apply(data) if gen_preictal and prev_data is not None: axis = prev_data.ndim - 1 def split(d): return np.split(d, 2, axis=axis) new_data = np.concatenate((split(prev_data)[1], split(data)[0]), axis=axis) transformed_new_data = pipeline.apply(new_data) X.append(transformed_new_data) y.append(yvalue) X.append(transformed_data) y.append(yvalue) prev_data = data print '(%ds)' % (time.get_seconds() - start) X = np.array(X) y = np.array(y) if preictal: print 'X', X.shape, 'y', y.shape return X, y elif interictal: print 'X', X.shape, 'y', y.shape return X, y else: print 'X', X.shape return X
def prepare_training_data(ictal_data, interictal_data, cv_ratio, withlatency=False): print 'Preparing training data ...', ictal_X, ictal_y = flatten(ictal_data.X), ictal_data.y interictal_X, interictal_y = flatten(interictal_data.X), interictal_data.y # split up data into training set and cross-validation set for both seizure and early sets if withlatency: ictal_X_train, ictal_y_train, ictal_X_cv, ictal_y_cv = split_train_ictal(ictal_X, ictal_y, ictal_data.latencies, cv_ratio) else: ictal_X_train, ictal_y_train, ictal_X_cv, ictal_y_cv = split_train_random(ictal_X, ictal_y, cv_ratio) interictal_X_train, interictal_y_train, interictal_X_cv, interictal_y_cv = split_train_random(interictal_X, interictal_y, cv_ratio) def concat(a, b): return np.concatenate((a, b), axis=0) X_train = concat(ictal_X_train, interictal_X_train) y_train = concat(ictal_y_train, interictal_y_train) X_cv = concat(ictal_X_cv, interictal_X_cv) y_cv = concat(ictal_y_cv, interictal_y_cv) y_classes = np.unique(concat(y_train, y_cv)) start = time.get_seconds() elapsedSecs = time.get_seconds() - start print "%ds" % int(elapsedSecs) print 'X_train:', np.shape(X_train) print 'y_train:', np.shape(y_train) print 'X_cv:', np.shape(X_cv) print 'y_cv:', np.shape(y_cv) print 'y_classes:', y_classes return { 'X_train': X_train, 'y_train': y_train, 'X_cv': X_cv, 'y_cv': y_cv, 'y_classes': y_classes }
def process_raw_data(mat_data, splitsize): start = time.get_seconds() print 'Loading data', # print mat_data SamplePerFile = [] X = [] y = [] cc = 0 for segment in mat_data: cc += 1 print cc for skey in segment.keys(): if "data" in skey.lower(): mykey = skey data = segment[mykey][0][0][0] if np.all(data == 0): print 'All of data zero, filling random numbers' for s in range(int(240000 / splitsize)): transformed_data = np.random.randn(transformed_data_length) X.append(transformed_data) SamplePerFile.append(int(240000 / splitsize)) continue data_tmp = data[np.invert(np.all(data == 0, axis=1))] sampleSizeinSecond = data_tmp.shape[0] / 400 data = data_tmp.transpose() axis = data.ndim - 1 print sampleSizeinSecond '''DataSampleSize: split the 10 minutes data into several clips: For one second data clip, patient1 and patient2 were finished in 3 hours. Dog1 clashed after 7+ hours for out of memory try ten second data clip ''' DataSampleSize = splitsize # data.shape[1] / (totalSample * 1.0) # try to split data into equal size splitIdx = np.arange(DataSampleSize, data.shape[1], DataSampleSize) splitIdx = np.int32(np.ceil(splitIdx)) splitData = np.hsplit(data, splitIdx) SPF = 0 #pre_sample_size = 0 #channel = 16 # if target == '2': # channel = 14 for s in splitData: transformed_data = pipeline.apply(s) X.append(transformed_data) SPF += 1 SamplePerFile.append(SPF) print 'done' transformed_data_length = transformed_data.shape[0] X = np.array(X) print 'X', X.shape return X, SamplePerFile
def prepare_training_data(ictal_data, interictal_data, cv_ratio): print 'Preparing training data ...', ictal_X, ictal_y = flatten(ictal_data.X), ictal_data.y interictal_X, interictal_y = flatten(interictal_data.X), interictal_data.y # split up data into training set and cross-validation set for both seizure and early sets ictal_X_train, ictal_y_train, ictal_X_cv, ictal_y_cv = split_train_ictal( ictal_X, ictal_y, ictal_data.latencies, cv_ratio) interictal_X_train, interictal_y_train, interictal_X_cv, interictal_y_cv = split_train_random( interictal_X, interictal_y, cv_ratio) def concat(a, b): return np.concatenate((a, b), axis=0) X_train = concat(ictal_X_train, interictal_X_train) y_train = concat(ictal_y_train, interictal_y_train) X_cv = concat(ictal_X_cv, interictal_X_cv) y_cv = concat(ictal_y_cv, interictal_y_cv) y_classes = np.unique(concat(y_train, y_cv)) start = time.get_seconds() elapsedSecs = time.get_seconds() - start print "%ds" % int(elapsedSecs) print 'X_train:', np.shape(X_train) print 'y_train:', np.shape(y_train) print 'X_cv:', np.shape(X_cv) print 'y_cv:', np.shape(y_cv) print 'y_classes:', y_classes return { 'X_train': X_train, 'y_train': y_train, 'X_cv': X_cv, 'y_cv': y_cv, 'y_classes': y_classes }
def process_raw_data(mat_data): start = time.get_seconds() print 'Loading data', #print mat_data X = [] y = [] previous_transformed_data = [] #used in two window model previous_sequence = 0 for segment in mat_data: for skey in segment.keys(): if "_segment_" in skey.lower(): mykey = skey if preictal: preictual_sequence = segment[mykey][0][0][4][0][0] y_value = preictual_sequence #temporarily set to sequence number if preictual_sequence != previous_sequence+1: previous_transformed_data = [] #if data is not in sequence previous_sequence = preictual_sequence elif interictal: y_value = 0 previous_transformed_data = [] #interictal data is not in sequence between files else: previous_transformed_data = [] #test data is not in sequence between files data = segment[mykey][0][0][0] sampleFrequency = segment[mykey][0][0][2][0][0] axis = data.ndim - 1 if sampleFrequency > targetFrequency: #resample to target frequency data = resample(data, targetFrequency*sampleSizeinSecond, axis=axis) '''DataSampleSize: split the 10 minutes data into several clips: For one second data clip, patient1 and patient2 were finished in 3 hours. Dog1 clashed after 7+ hours for out of memory try ten second data clip ''' DataSampleSize = data.shape[1]/(totalSample *1.0) #try to split data into equal size splitIdx = np.arange(DataSampleSize, data.shape[1], DataSampleSize) splitIdx = np.int32(np.ceil(splitIdx)) splitData = np.hsplit(data,splitIdx) # for i in range(totalSample): # s = splitData[i] # s2 = splitData[i+totalSample] for s in splitData: if s.size > 0: #is not empty # s = 1.0 * s #convert int to float # s_scale = preprocessing.scale(s, axis=0, with_std = True) # transformed_data = pipeline.apply([subjectID, s]) transformed_data = pipeline.apply(s) # previous_transformed_data.append(transformed_data) # transformed_data2 = pipeline.apply([subjectID, s1]) # if len(previous_transformed_data) > totalSample/2: # combined_transformed_data = np.concatenate((transformed_data, previous_transformed_data.pop(0)), axis=transformed_data.ndim-1) # X.append(combined_transformed_data) X.append(transformed_data) if preictal or interictal: y.append(y_value) print '(%ds)' % (time.get_seconds() - start) X = np.array(X) if preictal or interictal: y = np.array(y) print 'X', X.shape, 'y', y.shape return X, y else: print 'X', X.shape return X
def parse_input_data(filename, ref='None'): def read_mat_data(filename): if os.path.exists(filename): mat_data = scipy.io.loadmat(filename) else: raise Exception("file %s not found" % filename) return mat_data def report_time(start): print '(%ds)' % (time.get_seconds() - start) new_start = time.get_seconds() return new_start # for each data point in ictal, interictal and test, # generate (X, <y>, <latency>) per channel def get_data(mat_data, data_type='data', problem_channels=[]): print 'Loading data', if 'data_behavior' in mat_data: dataKey = 'data_behavior' elif 'data_3sFIR' in mat_data: dataKey = 'data_3sFIR' else: dataKey = 'data' print "mat:", mat_data[dataKey].shape data = mat_data[dataKey][0:TOTAL_CH_NUM, :] if len(problem_channels) != 0: for each_channel in problem_channels: data = np.delete(data, each_channel - 1, axis=0) if data_type == 'data': print 'Data:', data.shape, data return data elif data_type == 'latencies': if mat_data[dataKey].shape[0] > TOTAL_CH_NUM: latencies = mat_data[dataKey][TOTAL_CH_NUM, :] else: latencies = np.zeros(len(data[0])) print 'Latencies:', latencies return latencies def plot_EEG(data): """ Plot out the original EEG signals. """ print 'Plotting out the original EEG signals... ', channels_fig = plt.figure() x1 = np.arange(START_TIME, END_TIME, 1.0 / SAMPLE_FREQUENCY) for i in range(0, CH_NUM): plt.subplot(CH_NUM, 1, i + 1) plt.plot( x1, data[i, START_TIME * SAMPLE_FREQUENCY:END_TIME * SAMPLE_FREQUENCY]) plt.show() def plot_data(data, plot_name='None', s=START_TIME, e=END_TIME, period=1.0 / SAMPLE_FREQUENCY): """ Plot out abitrary data. """ print 'Plotting out figure:', plot_name plt.figure() plt.title(plot_name) x1 = np.arange(s, e, period) col = data.shape[0] for i in range(0, col): if i == 1: plt.title(plot_name) plt.subplot(col, 1, i + 1) plt.plot(x1, data[i]) plt.ylim(-0.001, 0.001) plt.show() def calculate_eigenvalue_ref(data, data_type='None'): """ Using sliding window to calculate the change of eigenvalue with time. """ print 'Calculating reference change of eigenvalue in ', data_type, ' domain' #the change of eigenvalue with time in frequency/time domain eigen_ref = [] for i in range(int(REF_TIME_START * SAMPLE_FREQUENCY), int(REF_TIME_END * SAMPLE_FREQUENCY)): if data_type == 'Time': data_correlation = transforms.TimeCorrelation_whole( 50, 'usf').apply(data[:, i:i + WINDOW_RANGE]) elif data_type == 'Frequency': data_correlation = transforms.FreqCorrelation_whole( 1, 50, 'usf').apply(data[:, i:i + WINDOW_RANGE]) w = transforms.Eigenvalues().apply(data_correlation) eigen_ref.append(w) eigen_ref = np.array(eigen_ref) eigen_ref = np.swapaxes(eigen_ref, 0, 1) print data_type, ' eigen ref:', eigen_ref.shape print eigen_ref return eigen_ref def calculate_eigen_change(data, ref_mean, ref_std, data_type='none'): eigen_change = [] for i in range(int(START_TIME * SAMPLE_FREQUENCY), int(END_TIME * SAMPLE_FREQUENCY), SAMPLE_FREQUENCY / 4): if (data_type == 'Time'): data_correlation = transforms.TimeCorrelation_whole( 50, 'usf').apply(data[:, i:i + WINDOW_RANGE]) elif (data_type == 'Frequency'): data_correlation = transforms.FreqCorrelation_whole( 1, 50, 'usf').apply(data[:, i:i + WINDOW_RANGE]) w = transforms.Eigenvalues().apply(data_correlation) eigen_change.append(w) eigen_change = np.array(eigen_change) eigen_change = np.swapaxes(eigen_change, 0, 1) print data_type, ' change:', eigen_change for i in range(0, eigen_change.shape[1]): for j in range(0, CH_NUM): if i < 2: print i, j print eigen_change[j][i], print t_eigen_ref_mean[j], print t_eigen_ref_std[j][0] eigen_change[j][i] = (eigen_change[j][i] - ref_mean[j]) / ref_std[j][0] print data_type, 'eigen change normalized:', eigen_change.shape print eigen_change return eigen_change def calculate_slope_ref(data): """ Calculate the standard deviation and normalized slope to define seizures. """ print 'Calculating the reference slope and change of slope ... ' #reference slope slope_stats = [] for i in range(int(REF_TIME_START * SAMPLE_FREQUENCY), int(REF_TIME_END * SAMPLE_FREQUENCY)): slopes = [] for j in range(0, CH_NUM): slope = (data[j, i + 1] - data[j, i]) * SAMPLE_FREQUENCY slopes.append(slope) slope_stats.append(slopes) slope_stats = np.array(slope_stats) slope_stats = np.swapaxes(slope_stats, 0, 1) slope_stats = transforms.Stats().apply(slope_stats) print "Slope stats:", slope_stats.shape print slope_stats return slope_stats def calculate_slope_change(data, slope_stats, data_type='change'): #change of slope #note: smoothed by SMOOTHING_PERIOD s average, calculated for each sec slope_change = [] seizure_num_by_slope = [] for i in range(int(START_TIME * SAMPLE_FREQUENCY), int(END_TIME * SAMPLE_FREQUENCY), SAMPLE_FREQUENCY): seizure_channels_by_slope = 0 slopes = [] for j in range(0, CH_NUM): average_slope = 0.0 for k in range(0, int(SMOOTHING_PERIOD * SAMPLE_FREQUENCY)): slope = (data[j, i + 1 + k] - data[j, i + k]) * SAMPLE_FREQUENCY average_slope += abs(slope) average_slope /= SMOOTHING_PERIOD * SAMPLE_FREQUENCY slope_normalized = abs(average_slope / slope_stats[j][0]) #slope_normalized = abs(slope / slope_stats[j][0]) if (slope_normalized > SLOPE_THRESHOLD): seizure_channels_by_slope += 1 slopes.append(slope_normalized) slope_change.append(slopes) seizure_num_by_slope.append(seizure_channels_by_slope) if data_type == 'change': slope_change = np.array(slope_change) print 'slope change of each channel', slope_change.shape print slope_change return slope_change elif data_type == 'num': seizure_num_by_slope = np.array(seizure_num_by_slope) print 'seizure_num_by_slope', seizure_num_by_slope return seizure_num_by_slope def plot_figures(latencies=[], seizure_num_by_slope=[], slope_change=[], t_eigen_change=[], f_eigen_change=[]): #Plot out the seizure period and correlation structure. print 'Plotting out the other figures.. ', #seizure onset by observation fig = plt.figure() """ plt.subplot(ROW_NUM, COL_NUM, 3) plt.title('Seizure Time by Behavior') x2 = np.arange(START_TIME, END_TIME, 1.0/SAMPLE_FREQUENCY) plt.plot(x2, latencies[START_TIME*SAMPLE_FREQUENCY:END_TIME*SAMPLE_FREQUENCY]) plt.axis([START_TIME, END_TIME, 0, 5]) plt.xlabel('time(s)') plt.ylabel('seizure status') """ #seizure onset by slope_normalized > 2.5 plt.subplot(ROW_NUM, COL_NUM, 2) plt.title('Seizure Time by (Normalized Slope > 2.5) num ') #x3 = np.arange(START_TIME, END_TIME, 1.0/SAMPLE_FREQUENCY) x3 = np.arange(START_TIME, END_TIME, 1) #plt.plot(x3, slope_change) plt.plot(x3, seizure_num_by_slope) plt.axis([START_TIME, END_TIME, 0, 8]) plt.ylabel('# of (sn > 2.5)') if len(slope_change) != 0: #slope change of each channel slope_change = np.array(slope_change) slope_change = np.swapaxes(slope_change, 0, 1) plt.subplot(ROW_NUM, COL_NUM, 1) plt.title('Slope change of each channel(moving average by 5 sec)') im = plt.imshow(slope_change, origin='lower', aspect='auto', extent=[START_TIME, END_TIME, 1, CH_NUM]) #,interpolation = 'none') plt.ylabel('channel') fig.subplots_adjust(right=0.93) plt.clim(COLOR_MIN, COLOR_MAX) cbax = fig.add_axes([0.94, 0.82, 0.01, 0.12]) fig.colorbar(im, cax=cbax) else: #time correlation plt.subplot(ROW_NUM, COL_NUM, 1) plt.title('Time Domain Correlation Analysis (Normalized)') im = plt.imshow(t_eigen_change, origin='lower', aspect='auto', extent=[START_TIME, END_TIME, 0, CH_NUM]) #, interpolation = 'none') plt.ylabel('eigenvalues') plt.clim(COLOR_MIN, COLOR_MAX) """ #phase correlation #f_eigen_change = np.array(f_eigen_change) #f_eigen_change = np.swapaxes(f_eigen_change, 0, 1) print "f eigen change", f_eigen_change.shape plt.subplot(ROW_NUM, COL_NUM, 2) plt.title('Frequency Domain Correlation Analysis (Normalized)') im = plt.imshow(f_eigen_change, origin = 'lower', aspect = 'auto', extent = [START_TIME,END_TIME,0,7])#, interpolation = 'none') #plt.colorbar() """ plt.tight_layout() #adjust the space between plots fig.subplots_adjust(right=0.93) plt.clim(COLOR_MIN, COLOR_MAX) cbax = fig.add_axes([0.94, 0.82, 0.01, 0.12]) fig.colorbar(im, cax=cbax) plt.show() start = time.get_seconds() initial_start = time.get_seconds() mat_data = read_mat_data(filename) #data = get_data(mat_data) data = get_data(mat_data, problem_channels=PROBLEM_CH) start = report_time(start) plot_data(data[:, START_TIME * SAMPLE_FREQUENCY:END_TIME * SAMPLE_FREQUENCY], plot_name='EEG') #plot_EEG(data) start = report_time(start) if ref != 'None': print "Reference Data:", ref ref_mat_data = read_mat_data(ref) ref_data = get_data(ref_mat_data, problem_channels=PROBLEM_CH) plot_data(ref_data[:, REF_TIME_START * SAMPLE_FREQUENCY:REF_TIME_END * SAMPLE_FREQUENCY], plot_name='Reference EEG', s=REF_TIME_START, e=REF_TIME_END) else: print "Reference Data:", filename ref_data = data slope_ref = calculate_slope_ref(ref_data) #slope_change = calculate_slope_change(data, slope_ref, 'change') slope_num = calculate_slope_change(data, slope_ref, 'num') start = report_time(start) t_eigen_ref = calculate_eigenvalue_ref(ref_data, data_type="Time") start = report_time(start) t_eigen_ref_std = transforms.Stats().apply(t_eigen_ref) print 'ref std:' print t_eigen_ref_std start = report_time(start) t_eigen_ref_mean = np.average(t_eigen_ref, axis=1) print 'ref avg:' print t_eigen_ref_mean start = report_time(start) t_eigen_change = calculate_eigen_change(data, t_eigen_ref_mean, t_eigen_ref_std, data_type='Time') start = report_time(start) f_eigen_ref = calculate_eigenvalue_ref(ref_data, data_type='Frequency') f_eigen_ref_std = transforms.Stats().apply(f_eigen_ref) f_eigen_ref_mean = np.average(f_eigen_ref, axis=1) f_eigen_change = calculate_eigen_change(data, f_eigen_ref_mean, f_eigen_ref_std, data_type='Frequency') start = report_time(start) plot_figures( latencies=get_data(mat_data, 'latencies'), seizure_num_by_slope=slope_num, # slope_change = slope_change, t_eigen_change=t_eigen_change, f_eigen_change=f_eigen_change) """ plot_figures(latencies = get_data(mat_data, 'latencies'), seizure_num_by_slope = slope_num, slope_change = slope_change) """ print '======================' print 'Total time:', start = report_time(initial_start) print
def load_training_data(settings, target, pipeline, check_only, strategy=None, cv_fold_number=None, quiet=False): cv = cv_fold_number is not None if check_only: return load_pipeline_data(settings, target, 'preictal', pipeline, check_only=True, quiet=quiet) or \ load_pipeline_data(settings, target, 'interictal', pipeline, check_only=True, quiet=quiet) preictal, preictal_meta = load_pipeline_data(settings, target, 'preictal', pipeline, check_only=False, quiet=quiet) interictal, interictal_meta = load_pipeline_data(settings, target, 'interictal', pipeline, check_only=False, quiet=quiet) total_segments = preictal_meta.num_segments + interictal_meta.num_segments # print 'total_segments', total_segments if not quiet: print 'Preparing data ...', start = time.get_seconds() def make_fold(preictal_X_train, preictal_X_cv, interictal_X_train, interictal_X_cv): num_train_segments = preictal_X_train.shape[0] + interictal_X_train.shape[0] num_cv_segments = preictal_X_cv.shape[0] + interictal_X_cv.shape[0] assert (num_train_segments + num_cv_segments) == total_segments flattened_preictal_X_train = flatten(preictal_X_train) flattened_interictal_X_train = flatten(interictal_X_train) flattened_preictal_X_cv = flatten(preictal_X_cv) if cv else np.empty((0,)) flattened_interictal_X_cv = flatten(interictal_X_cv) if cv else np.empty((0,)) X_train = np.concatenate((flattened_preictal_X_train, flattened_interictal_X_train), axis=0) X_cv = np.concatenate((flattened_preictal_X_cv, flattened_interictal_X_cv), axis=0) preictal_y_train = np.ones((flattened_preictal_X_train.shape[0],)) preictal_y_cv = np.ones((preictal_X_cv.shape[0],)) interictal_y_train = np.zeros((flattened_interictal_X_train.shape[0],)) interictal_y_cv = np.zeros((interictal_X_cv.shape[0],)) y_train = np.concatenate((preictal_y_train, interictal_y_train), axis=0) y_cv = np.concatenate((preictal_y_cv, interictal_y_cv), axis=0) X_train, y_train = sklearn.utils.shuffle(X_train, y_train, random_state=0) return jsdict({ 'X_train': X_train, 'y_train': y_train, 'X_cv': X_cv, 'y_cv': y_cv, 'num_train_segments': num_train_segments, 'num_cv_segments': num_cv_segments }) if cv: preictal_X_train, preictal_X_cv = strategy.split_train_cv(preictal, preictal_meta, cv_fold_number) interictal_X_train, interictal_X_cv = strategy.split_train_cv(interictal, interictal_meta, cv_fold_number, interictal=True) data = make_fold(preictal_X_train, preictal_X_cv, interictal_X_train, interictal_X_cv) else: preictal_X_train = preictal preictal_X_cv = np.empty((0,)) interictal_X_train = interictal interictal_X_cv = np.empty((0,)) data = make_fold(preictal_X_train, preictal_X_cv, interictal_X_train, interictal_X_cv) if not quiet: print '%ds' % (time.get_seconds() - start) if not quiet: print 'X_train', data.X_train.shape, 'y_train', data.y_train.shape, 'X_cv', data.X_cv.shape, 'y_cv', data.y_cv.shape return data
def process_raw_data(mat_data, splitsize): start = time.get_seconds() print 'Loading data', X = [] y = [] h_num = [] cc = 0 hour_num = 0 pre_sequence_num = 0 for segment in mat_data: cc += 1 print cc for skey in segment.keys(): if "data" in skey.lower(): mykey = skey try: sequence_num = segment[mykey][0][0][4][0][0] except: sequence_num = random.randint(1, 6) print 'seq: %d' % (sequence_num) if sequence_num == pre_sequence_num + 1: hour_num = hour_num else: hour_num += 1 print "hour_num: %d" % (hour_num) pre_sequence_num = sequence_num if preictal: try: preictual_sequence = segment[mykey][0][0][4][0][0] except: preictual_sequence = 1 else: pass y_value = preictual_sequence # temporarily set to sequence number elif interictal: y_value = 0 data = segment[mykey][0][0][0] # if target == '2': # data = np.delete(data, [3, 9], 1) data_tmp = data[np.invert(np.all(data == 0, axis=1))] if data_tmp.shape[0] <= 2000: print 'too much zeros, skipping' continue sampleSizeinSecond = data_tmp.shape[0] / 400 data = data_tmp.transpose() axis = data_tmp.ndim - 1 # tic=time.get_seconds() print sampleSizeinSecond '''DataSampleSize: split the 10 minutes data into several clips: For one second data clip, patient1 and patient2 were finished in 3 hours. Dog1 clashed after 7+ hours for out of memory try ten second data clip ''' DataSampleSize = splitsize # data.shape[1]/(totalSample *1.0) #try to split data into equal size splitIdx = np.arange(DataSampleSize, data.shape[1], DataSampleSize) splitIdx = np.int32(np.ceil(splitIdx)) splitData = np.hsplit(data, splitIdx) SPF = 0 for s in splitData: if s.shape[1] < 5000: #is not so sparse continue else: transformed_data = pipeline.apply(s) X.append(transformed_data) y.append(y_value) h_num.append(hour_num) SPF += 1 if np.any(np.isnan(transformed_data)) or np.any( np.isinf(transformed_data)): print 'bug' print 'done' print '(%ds)' % (time.get_seconds() - start) X = np.array(X) y = np.array(y) h_num = np.array(h_num) print 'X', X.shape, 'y', y.shape return X, y, h_num
def test(self, X, Y, show_plots=True, bias_name=0, training_samples = 'training', soz_ch_ids=None, sel_win_num=None, clip_sizes=None): print('testing ..') start_time = time.get_seconds() bias_name = 0 # self.test_num_to_load = X.shape[0]* X.shape[1] # self._test_graphL() # self._test_loss() # config = tf.ConfigProto(log_device_placement=FLAGS.log_device_placement) # config.gpu_options.allow_growth = True #config.gpu_options.per_process_gpu_memory_fraction = GPU_MEM_FRACTION # config.allow_soft_placement = True # self.sess = tf.Session() #config=config # self.sess.run(tf.global_variables_initializer()) # for counter in np.arange(X.shape[0]): # feed_dict = {self.placeholders['X']: np.squeeze(X[counter,:,:,:]), # self.placeholders['Y']: np.squeeze(Y[counter,:])} # outs = self.sess.run([self.test_graphL_W, self.test_loss, self.test_pred_classes], feed_dict=feed_dict) # if(show_plots): # plotting_weights('MIT_plots', str(counter+bias_name), outs[0], intervals_seizures=np.squeeze(Y[counter,:]), estimated_states=outs[2]) # print(' loss: ', outs[1]) # Y_flat = X # np.reshape(Y,(Y.shape[0] * Y.shape[1],)) # X_flat = Y # np.reshape(X,(X.shape[0] * X.shape[1], X.shape[2], X.shape[3])) # feed_dict = {self.placeholders['X']: X_flat, self.placeholders['Y']: Y_flat} classif_minibatch = miniBatchIterator(self.graphL_minibatch, self.classif_core.batch_size, self.placeholders, X, Y, clip_sizes=clip_sizes) y_hat = None prob_hat = None counter = 0 while(not classif_minibatch.end()): feed_dict = classif_minibatch.next() # outs = self.sess.run([self.test_graphL_W, self.test_loss, self.test_pred_classes, self.test_pred_probas], feed_dict=feed_dict) outs = self.sess.run([self.graphL_W, self.loss, self.pred_classes, self.pred_probas], feed_dict=feed_dict) inn_y = outs[2] inn_prob = outs[3][:,1] start_idx, end_idx = classif_minibatch.current_idx() true_y = Y[start_idx:end_idx] inn_soz_ch_ids = soz_ch_ids[start_idx:end_idx] inn_sel_win_num = sel_win_num[start_idx:end_idx] if(classif_minibatch.end()): sel_idxx = range(inn_y.size-(Y.size-y_hat.size),inn_y.size) inn_y = inn_y[sel_idxx] inn_prob = inn_prob[sel_idxx] true_y = true_y[sel_idxx] inn_soz_ch_ids = inn_soz_ch_ids[sel_idxx] inn_sel_win_num = inn_sel_win_num[sel_idxx] y_hat = inn_y if y_hat is None else np.concatenate((y_hat, inn_y)) prob_hat = inn_prob if prob_hat is None else np.concatenate((prob_hat, inn_prob), axis=0) # print('prob hat shape: ', prob_hat.shape) # print('batch num: ', classif_minibatch.batch_num) change_arg = np.squeeze(np.argwhere(true_y!=0)) if(show_plots and change_arg.size!=0): feats = outs[0] change_arg = list(np.arange(np.max((0,change_arg[0]-15)),change_arg[0],1)) + list(change_arg) # + list(np.arange(change_arg[-1], np.min((true_y.size,change_arg[-1]+3)),1)) # change_arg = list(np.arange(9)) print('change_arg: ', change_arg) feats = [feats[int(i)] for i in change_arg] inn_y = inn_y[change_arg] true_y = true_y[change_arg] inn_soz_ch_ids = inn_soz_ch_ids[change_arg] inn_sel_win_num = inn_sel_win_num[change_arg] plotting_weights('EU_plots/', training_samples+str(counter+bias_name), feats, intervals_seizures=true_y, \ estimated_states=inn_y, soz_ch_ids=inn_soz_ch_ids, sel_win_num=inn_sel_win_num) counter += 1 print(' loss: ', outs[1]) # if(self.load_Core.num_classes==2): eval_performance(Y, y_hat, prob_hat, training_samples) print(' time elapsed: ', time.get_seconds()-start_time)
def process_raw_data(mat_data, with_latency): start = time.get_seconds() print 'Loading data', X = [] y = [] latencies = [] prev_data = None prev_sequence = None prev_latency = None for segment in mat_data: if task_predict: for key in segment.keys(): if not key.startswith('_'): break data = segment[key]['data'][0,0] if key.startswith('preictal') or key.startswith('interictal'): sequence = segment[key]['sequence'][0,0][0,0] else: sequence = None else: data = segment['data'] sequence = None if pipeline is not None: transformed_data = pipeline.apply(data) else: transformed_data = data if with_latency: # this is ictal latency = segment['latency'][0] if latency <= 15: y_value = 0 # ictal <= 15 else: y_value = 1 # ictal > 15 # generate extra ictal training data by taking 2nd half of previous # 1-second segment and first half of current segment # 0.5-1.5, 1.5-2.5, ..., 13.5-14.5, ..., 15.5-16.5 # cannot take half of 15 and half of 16 because it cannot be strictly labelled as early or late if gen_ictal and prev_data is not None and prev_latency + 1 == latency and prev_latency != 15: # gen new data :) axis = prev_data.ndim - 1 def split(d): return np.split(d, 2, axis=axis) new_data = np.concatenate((split(prev_data)[1], split(data)[0]), axis=axis) if pipeline is not None: X.append(pipeline.apply(new_data)) else: X.append(new_data.copy()) y.append(y_value) latencies.append(latency - 0.5) y.append(y_value) latencies.append(latency) prev_latency = latency elif y is not None: # this is interictal label = 0 if key.startswith('preictal') else 2 if key.startswith('preictal') or key.startswith('interictal'): # generate extra training data by taking overlaps with previous # segment # negative gen_ictal indicates we want to correct for DC jump between segments # non integer value indicates we want to generate overlaps also for negative examples ng = abs(int(gen_ictal)) # number of overlapping windows if (gen_ictal and (key.startswith('preictal') or gen_ictal != int(gen_ictal)) and prev_data is not None and prev_sequence+1 == sequence): if isinstance(gen_ictal,bool) or gen_ictal > 0: new_data = np.concatenate((prev_data, data), axis=-1) else: # see 140922-signal-crosscorelation # it looks like each segment was scaled to have DC=0 # however different segments will be scaled differently # as result you can't concatenate sequential segments # without undoing the relative offset # import scipy.signal # # we want to filter the samples so as to not be sensitive to change in the signal itself # # over the distance of one sample (1/Fs). Taking 100 samples sounds safe enough. # normal_cutoff = 2./100. # 1/100*Fs in Hz # order = 6 # b, a = scipy.signal.butter(order, normal_cutoff, btype='low', analog=False) # # use filtfilt to get zero phase http://wiki.scipy.org/Cookbook/FiltFilt # W1 = 5000 # x1 = scipy.signal.filtfilt(b, a, prev_data[:,-W1:]) # # we want the first sample of data after fitering so we will run it backward through # # the filter # x2 = scipy.signal.filtfilt(b, a, data[:,W1-1::-1]) # # the first sample of data should be about the same as the last sample of prev_data # data_offset = x2[:,-1] - x1[:,-1] if data.shape[1] > 5*60*5000: # only Patients need offset correction data_offset = data[:,:1].mean(axis=-1) - prev_data[:,-1:].mean(axis=-1) data -= data_offset.reshape(-1,1) new_data = np.concatenate((prev_data, data), axis=-1) # jump = np.mean(np.abs(prev_data[:,-1]-data[:,0])*2./(np.std(prev_data[:,-4000:],axis=-1)+np.std(data[:,:4000],axis=-1))) # if jump < 0.7: # if ng==1: # # gen new data :) # axis = prev_data.ndim - 1 # def split(d): # return np.split(d, 2, axis=axis) # new_data = np.concatenate((split(prev_data)[1], split(data)[0]), axis=axis) # X.append(pipeline.apply(new_data)) # y.append(0) # seizure # latencies.append(sequence-0.5) # else: n = data.shape[1] s = n / (ng + 1.) # new_data = np.concatenate((prev_data, data), axis=-1) for i in range(1,ng+1): start = int(s*i) if pipeline is not None: X.append(pipeline.apply(new_data[:,start:(start+n)])) else: X.append(new_data[:,start:(start+n)].copy()) y.append(label) # seizure latencies.append(sequence-1.+i/(ng+1.)) y.append(label) # seizure latencies.append(float(sequence)) else: y.append(label) # no seizure X.append(transformed_data) prev_data = data prev_sequence = sequence print '(%ds)' % (time.get_seconds() - start) X = np.array(X) y = np.array(y) latencies = np.array(latencies) if ictal or preictal or interictal: print 'X', X.shape, 'y', y.shape, 'latencies', latencies.shape return X, y, latencies # elif interictal: # print 'X', X.shape, 'y', y.shape # return X, y else: print 'X', X.shape return X
def main(): current_path = os.path.dirname(os.path.abspath(__file__)) print 'Current path: ', current_path file_path = input_filename(current_path) ref = input_filename(current_path, 'Ref') print 'Test file: ', file_path print 'Reference file: ', ref print start_time = input_variable('start_time to calculate') end_time = input_variable('end_time to calculate') start = time.get_seconds() initial_start = time.get_seconds() mat_data = read_mat_data(file_path) data = get_data(mat_data, problem_channels = PROBLEM_CH) start = report_time(start) plot_data(data[:, start_time*SAMPLE_FREQUENCY:end_time*SAMPLE_FREQUENCY],start_time = start_time, end_time = end_time, plot_name = 'Exp EEG') #plot_data(data[STFT_CH-1:STFT_CH,self.s*SAMPLE_FREQUENCY:self.e*SAMPLE_FREQUENCY], plot_name = 'Exp EEG') start = report_time(start) latencies = get_data(mat_data, 'latencies') ref_mat_data = read_mat_data(ref) ref_data = get_data(ref_mat_data, problem_channels = PROBLEM_CH) plot_data(ref_data[:,REF_TIME_START*SAMPLE_FREQUENCY:REF_TIME_END*SAMPLE_FREQUENCY], plot_name = 'Reference EEG', start_time = REF_TIME_START, end_time = REF_TIME_END) start = report_time(start) do_slope = do_eigen = do_stft = do_corr =False do_slope = input_yes_or_no('If do slope?') do_eigen = input_yes_or_no('If do correlation structure(eigenvalues)?') do_stft = input_yes_or_no('If do STFT?') do_corr = input_yes_or_no('If do correlation sum?') if (do_slope == False and do_eigen == False and do_stft == False and do_corr == False): print 'Nothing to calculate.' return restart() if (do_corr == True): print '============================================================================' print '== ==' print '== Note: ==' print '== \'Correlation sum\' is invented by the author of this program, ==' print '== no reference paper, ==' print '== not sure if there is a similar method by others, ==' print '== not sure if it works well. ==' print '== Please check the reliability and inform the author for further usage. ==' print '== ==' print '============================================================================' if not (input_yes_or_no('Read and agree the above?')): print 'Not agree.' return restart() c = do_calculation(start_time, end_time) if (do_slope): print print '==============' print '== Do slope ==' print '==============' slope_ref = c.calculate_slope_ref(ref_data) #slope_change = calculate_slope_change(data, slope_ref, 'change') slope_num = c.calculate_slope_change(data, slope_ref, 'num') start = report_time(start) print 'ref_data', ref_data.shape #stft_change_ref = calculate_stft(ref_data[:, REF_TIME_START*SAMPLE_FREQUENCY:REF_TIME_END*SAMPLE_FREQUENCY], 1, 50) if (do_eigen): print print '==============' print '== Do eigen ==' print '==============' t_eigen_ref = c.calculate_eigenvalue_ref(ref_data, data_type = "Time") start = report_time(start) t_eigen_ref_std = transforms.Stats().apply(t_eigen_ref) print 'ref std:', t_eigen_ref_std.shape start = report_time(start) t_eigen_ref_mean = np.average(t_eigen_ref, axis = 1) print 'ref avg:', t_eigen_ref_mean.shape start = report_time(start) t_eigen_change = c.calculate_eigen_change(data, t_eigen_ref_mean, t_eigen_ref_std, data_type = 'Time') start = report_time(start) f_eigen_ref = c.calculate_eigenvalue_ref(ref_data, data_type = 'Frequency') f_eigen_ref_std = transforms.Stats().apply(f_eigen_ref) f_eigen_ref_mean = np.average(f_eigen_ref, axis = 1) f_eigen_change = c.calculate_eigen_change(data, f_eigen_ref_mean, f_eigen_ref_std, data_type = 'Frequency') if (do_stft): print print '=============' print '== Do STFT ==' print '=============' stft_change = c.calculate_stft(data, ref = ref_data[:, REF_TIME_START*SAMPLE_FREQUENCY:REF_TIME_END*SAMPLE_FREQUENCY]) start = report_time(start) stft_change = np.swapaxes(stft_change, 0 , 1) print 'stft change swap', stft_change.shape if (do_corr): print print '========================' print '== Do correlation sum ==' print '========================' corr_change_ref = c.calculate_corr_ref(ref_data, data_type = 'Time') corr_change_ref_std = transforms.Stats().apply(corr_change_ref) corr_change_ref_mean = np.average(corr_change_ref, axis = 1) start = report_time(start) corr_change = c.calculate_corr_change(data, corr_change_ref_mean, corr_change_ref_std, data_type = 'Time') print if input_yes_or_no('If plot out the results?'): def check_plot_options(): latencies_t = slope_num_t = slope_change_t = t_eigen_change_t = f_eigen_change_t = stft_change_t = corr_change_t = [] stft_ch_t = -1 num_of_figures = 0 if input_yes_or_no('If show behavior on the plot?'): latencies_t = latencies num_of_figures += 1 if do_slope: if input_yes_or_no('If show slope?'): slope_num_t = slope_num num_of_figures += 1 if do_eigen: if input_yes_or_no('If show time domain correlation structure?'): t_eigen_change_t = t_eigen_change num_of_figures += 1 if input_yes_or_no('If show frequency domain correlation strucure?'): f_eigen_change_t = f_eigen_change num_of_figures += 1 if do_stft: if input_yes_or_no('If show Short time Fourier transform(STFT)?\n Note: cannot print STFT with correlatioin structure.'): stft_ch_t = input_variable('STFT channel to show:') stft_change_t = stft_change[stft_ch_t-1] num_of_figures += 2 if do_corr: if input_yes_or_no('If show correlation sum?'): corr_change_t = corr_change num_of_figures += 1 p = c.plot_figures(latencies = latencies_t, seizure_num_by_slope = slope_num_t, slope_change = slope_change_t, t_eigen_change = t_eigen_change_t, f_eigen_change = f_eigen_change_t, stft_change = stft_change_t, stft_ch = stft_ch_t, corr_change = corr_change_t, number_of_figures = num_of_figures ) print if not p : print 'Plotted nothing' print if input_yes_or_no('Plot again?'): return check_plot_options() else: return False check_plot_options() if do_slope: if (input_yes_or_no('Save slope num data?')): filename = os.path.basename(file_path) savefilename = os.path.join(current_path, '%s_slope_%ds_%ds'%(filename, start_time, end_time)) scipy.io.savemat(savefilename, {'slope_num':slope_num, 'start_time':start_time, 'end_time':end_time}) print 'Saved file:%s.mat' % savefilename print if do_eigen: if (input_yes_or_no('Save correlation structure data?')): filename = os.path.basename(file_path) savefilename = os.path.join(current_path, '%s_correlation_structure_%ds_%ds'%(filename, start_time, end_time)) scipy.io.savemat(savefilename, {'time_corr_struct':t_eigen_change, 'freq_corr_struct': f_eigen_change, 'start_time':start_time, 'end_time':end_time}) print 'Saved file:%s.mat' % savefilename print if do_stft: if (input_yes_or_no('Save STFT data?')): filename = os.path.basename(file_path) savefilename = os.path.join(current_path, '%s_stft_%ds_%ds'%(filename, start_time, end_time)) scipy.io.savemat(savefilename, {'stft':stft_change, 'start_time':start_time, 'end_time':end_time}) print 'Saved file:%s.mat' % savefilename print if do_corr: if (input_yes_or_no('Save correlation sum data?')): filename = os.path.basename(file_path) savefilename = os.path.join(current_path, '%s_corr_%ds_%ds'%(filename, start_time, end_time)) scipy.io.savemat(savefilename, {'corr':corr_change, 'start_time':start_time, 'end_time':end_time}) print 'Saved file:%s.mat' % savefilename print print print '======================' print 'Total time:', print start = report_time(initial_start) print return restart()
def report_time(start): print '(Used %dsec)' % (time.get_seconds() - start) new_start = time.get_seconds() return new_start
def process_raw_data(mat_data,splitsize): start = time.get_seconds() print 'Loading data', X = [] y = [] h_num = [] cc = 0 hour_num = 0 pre_sequence_num = 0 for segment in mat_data: cc += 1 print cc for skey in segment.keys(): if "data" in skey.lower(): mykey = skey try: sequence_num = segment[mykey][0][0][4][0][0] except: sequence_num = random.randint(1, 6) print 'seq: %d' % (sequence_num) if sequence_num == pre_sequence_num + 1: hour_num = hour_num else: hour_num += 1 print "hour_num: %d" % (hour_num) pre_sequence_num = sequence_num if preictal: try: preictual_sequence = segment[mykey][0][0][4][0][0] except: preictual_sequence = 1 else: pass y_value = preictual_sequence # temporarily set to sequence number elif interictal: y_value = 0 data = segment[mykey][0][0][0] # if target == '2': # data = np.delete(data, [3, 9], 1) data_tmp = data[np.invert(np.all(data==0, axis=1))] if data_tmp.shape[0]<=2000: print 'too much zeros, skipping' continue sampleSizeinSecond = data_tmp.shape[0] / 400 data = data_tmp.transpose() axis = data_tmp.ndim - 1 # tic=time.get_seconds() print sampleSizeinSecond '''DataSampleSize: split the 10 minutes data into several clips: For one second data clip, patient1 and patient2 were finished in 3 hours. Dog1 clashed after 7+ hours for out of memory try ten second data clip ''' DataSampleSize = splitsize # data.shape[1]/(totalSample *1.0) #try to split data into equal size splitIdx = np.arange(DataSampleSize, data.shape[1], DataSampleSize) splitIdx = np.int32(np.ceil(splitIdx)) splitData = np.hsplit(data, splitIdx) SPF = 0 for s in splitData: if s.shape[1] < 5000: #is not so sparse continue else: transformed_data = pipeline.apply(s) X.append(transformed_data) y.append(y_value) h_num.append(hour_num) SPF += 1 if np.any(np.isnan(transformed_data)) or np.any(np.isinf(transformed_data)): print 'bug' print 'done' print '(%ds)' % (time.get_seconds() - start) X = np.array(X) y = np.array(y) h_num = np.array(h_num) print 'X', X.shape, 'y', y.shape return X, y, h_num
def process_raw_data(mat_data): start = time.get_seconds() print('Loading data', end=' ') #print mat_data X = [] y = [] previous_transformed_data = [] #used in two window model previous_sequence = 0 for segment in mat_data: for skey in list(segment.keys()): if "_segment_" in skey.lower(): mykey = skey if preictal: preictual_sequence = segment[mykey][0][0][4][0][0] y_value = preictual_sequence #temporarily set to sequence number if preictual_sequence != previous_sequence + 1: previous_transformed_data = [] #if data is not in sequence previous_sequence = preictual_sequence elif interictal: y_value = 0 previous_transformed_data = [ ] #interictal data is not in sequence between files else: previous_transformed_data = [ ] #test data is not in sequence between files data = segment[mykey][0][0][0] sampleFrequency = segment[mykey][0][0][2][0][0] axis = data.ndim - 1 if sampleFrequency > targetFrequency: #resample to target frequency data = resample(data, targetFrequency * sampleSizeinSecond, axis=axis) '''DataSampleSize: split the 10 minutes data into several clips: For one second data clip, patient1 and patient2 were finished in 3 hours. Dog1 clashed after 7+ hours for out of memory try ten second data clip ''' DataSampleSize = data.shape[1] / ( totalSample * 1.0) #try to split data into equal size splitIdx = np.arange(DataSampleSize, data.shape[1], DataSampleSize) splitIdx = np.int32(np.ceil(splitIdx)) splitData = np.hsplit(data, splitIdx) # for i in range(totalSample): # s = splitData[i] # s2 = splitData[i+totalSample] for s in splitData: if s.size > 0: #is not empty # s = 1.0 * s #convert int to float # s_scale = preprocessing.scale(s, axis=0, with_std = True) # transformed_data = pipeline.apply([subjectID, s]) transformed_data = pipeline.apply(s) # previous_transformed_data.append(transformed_data) # transformed_data2 = pipeline.apply([subjectID, s1]) # if len(previous_transformed_data) > totalSample/2: # combined_transformed_data = np.concatenate((transformed_data, previous_transformed_data.pop(0)), axis=transformed_data.ndim-1) # X.append(combined_transformed_data) X.append(transformed_data) if preictal or interictal: y.append(y_value) print('(%ds)' % (time.get_seconds() - start)) X = np.array(X) if preictal or interictal: y = np.array(y) print('X', X.shape, 'y', y.shape) return X, y else: print('X', X.shape) return X
def predict_all(make_predictions): for pipeline in pipelines: for (classifier, classifier_name) in classifiers: print('Using pipeline %s with classifier %s' % (pipeline.get_name(), classifier_name)) lines = ['clip,preictal'] subjectID = 0 X_train = y_train = X_test = test_size = [] for target in targets: task_core = TaskCore( cached_data_loader=cached_data_loader, data_dir=data_dir, target=target, pipeline=pipeline, classifier_name=classifier_name, classifier=classifier, normalize=should_normalize(classifier), gen_preictal=pipeline.gen_preictal, cv_ratio=cv_ratio) data = GetCrossSubjectDataTask(task_core).run() # a = np.shape(data.X_test)[0] test_size.append(np.shape(data.X_test)[0]) if subjectID > 0: X_train = np.concatenate((X_train, data.X_train), axis=0) y_train = np.concatenate((y_train, data.y_train), axis=0) X_test = np.concatenate((X_test, data.X_test), axis=0) else: X_train = data.X_train y_train = data.y_train X_test = data.X_test subjectID += 1 #Training task_core = TaskCore(cached_data_loader=cached_data_loader, data_dir=data_dir, target=[], pipeline=pipeline, classifier_name=classifier_name, classifier=classifier, normalize=should_normalize(classifier), gen_preictal=pipeline.gen_preictal, cv_ratio=cv_ratio) y_train = np.ceil(0.1 * y_train) y_train.astype('int_') if should_normalize(classifier): X_train, temp = normalize_data(X_train, X_train) print("Training ...") print('Dim', np.shape(X_train), np.shape(y_train)) start = time.get_seconds() classifier.fit(X_train, y_train) elapsedSecs = time.get_seconds() - start print("t=%ds" % int(elapsedSecs)) y_estimate = classifier.predict_proba(X_train) lr = LogisticRegression(random_state=0) lr.fit(y_estimate, y_train) predictions_proba = classifier.predict_proba(X_test) predictions_calibrated = lr.predict_proba(predictions_proba) #output m = 0 totalSample = 12 startIdx = 0 for target in targets: for i in range(test_size[m] / totalSample): j = i + 1 if j < 10: nstr = '000%d' % j elif j < 100: nstr = '00%d' % j elif j < 1000: nstr = '0%d' % j else: nstr = '%d' % j preictalOverAllSample = 0 for k in range(totalSample): p = predictions_calibrated[i * totalSample + k + startIdx] preictal = translate_prediction(p) preictalOverAllSample += preictal / totalSample newline = '%s_test_segment_%s.mat,%.15f' % ( target, nstr, preictalOverAllSample) lines.append(newline) print(newline) startIdx = startIdx + test_size[m] m += 1 filename = 'submission%d-%s_%s.csv' % (ts, classifier_name, pipeline.get_name()) filename = os.path.join(submission_dir, filename) with open(filename, 'w') as f: print('\n'.join(lines), file=f) print('wrote', filename)
def load_training_data(settings, target, pipeline, check_only, strategy=None, cv_fold_number=None, quiet=False): cv = cv_fold_number is not None if check_only: return load_pipeline_data(settings, target, 'preictal', pipeline, check_only=True, quiet=quiet) or \ load_pipeline_data(settings, target, 'interictal', pipeline, check_only=True, quiet=quiet) preictal, preictal_meta = load_pipeline_data(settings, target, 'preictal', pipeline, check_only=False, quiet=quiet) interictal, interictal_meta = load_pipeline_data(settings, target, 'interictal', pipeline, check_only=False, quiet=quiet) total_segments = preictal_meta.num_segments + interictal_meta.num_segments # print 'total_segments', total_segments if not quiet: print 'Preparing data ...', start = time.get_seconds() def make_fold(preictal_X_train, preictal_X_cv, interictal_X_train, interictal_X_cv): num_train_segments = preictal_X_train.shape[ 0] + interictal_X_train.shape[0] num_cv_segments = preictal_X_cv.shape[0] + interictal_X_cv.shape[0] assert (num_train_segments + num_cv_segments) == total_segments flattened_preictal_X_train = flatten(preictal_X_train) flattened_interictal_X_train = flatten(interictal_X_train) flattened_preictal_X_cv = flatten(preictal_X_cv) if cv else np.empty( (0, )) flattened_interictal_X_cv = flatten( interictal_X_cv) if cv else np.empty((0, )) X_train = np.concatenate( (flattened_preictal_X_train, flattened_interictal_X_train), axis=0) X_cv = np.concatenate( (flattened_preictal_X_cv, flattened_interictal_X_cv), axis=0) preictal_y_train = np.ones((flattened_preictal_X_train.shape[0], )) preictal_y_cv = np.ones((preictal_X_cv.shape[0], )) interictal_y_train = np.zeros( (flattened_interictal_X_train.shape[0], )) interictal_y_cv = np.zeros((interictal_X_cv.shape[0], )) y_train = np.concatenate((preictal_y_train, interictal_y_train), axis=0) y_cv = np.concatenate((preictal_y_cv, interictal_y_cv), axis=0) X_train, y_train = sklearn.utils.shuffle(X_train, y_train, random_state=0) return jsdict({ 'X_train': X_train, 'y_train': y_train, 'X_cv': X_cv, 'y_cv': y_cv, 'num_train_segments': num_train_segments, 'num_cv_segments': num_cv_segments }) if cv: preictal_X_train, preictal_X_cv = strategy.split_train_cv( preictal, preictal_meta, cv_fold_number) interictal_X_train, interictal_X_cv = strategy.split_train_cv( interictal, interictal_meta, cv_fold_number, interictal=True) data = make_fold(preictal_X_train, preictal_X_cv, interictal_X_train, interictal_X_cv) else: preictal_X_train = preictal preictal_X_cv = np.empty((0, )) interictal_X_train = interictal interictal_X_cv = np.empty((0, )) data = make_fold(preictal_X_train, preictal_X_cv, interictal_X_train, interictal_X_cv) if not quiet: print '%ds' % (time.get_seconds() - start) if not quiet: print 'X_train', data.X_train.shape, 'y_train', data.y_train.shape, 'X_cv', data.X_cv.shape, 'y_cv', data.y_cv.shape return data
def process_raw_data(mat_data, with_latency): start = time.get_seconds() print 'Loading data', X = [] y = [] latencies = [] prev_data = None prev_latency = None for segment in mat_data: data = segment['data'] transformed_data = pipeline.apply(data) if with_latency: # this is ictal latency = segment['latency'][0] if latency <= 15: y_value = 0 # ictal <= 15 else: y_value = 1 # ictal > 15 # generate extra ictal training data by taking 2nd half of previous # 1-second segment and first half of current segment # 0.5-1.5, 1.5-2.5, ..., 13.5-14.5, ..., 15.5-16.5 # cannot take half of 15 and half of 16 because it cannot be strictly labelled as early or late if gen_ictal and prev_data is not None and prev_latency + 1 == latency and prev_latency != 15: # gen new data :) axis = prev_data.ndim - 1 def split(d): return np.split(d, 2, axis=axis) new_data = np.concatenate((split(prev_data)[1], split(data)[0]), axis=axis) X.append(pipeline.apply(new_data)) y.append(y_value) latencies.append(latency - 0.5) y.append(y_value) latencies.append(latency) prev_latency = latency elif y is not None: # this is interictal y.append(2) X.append(transformed_data) prev_data = data print '(%ds)' % (time.get_seconds() - start) X = np.array(X) y = np.array(y) latencies = np.array(latencies) if ictal: print 'X', X.shape, 'y', y.shape, 'latencies', latencies.shape return X, y, latencies elif interictal: print 'X', X.shape, 'y', y.shape return X, y else: print 'X', X.shape return X
def train(self, X, Y): print('training ..') start_time = time.get_seconds() self.classif_minibatch = miniBatchIterator(self.graphL_minibatch, self.classif_core.batch_size, self.placeholders, X, Y) #config = tf.ConfigProto(log_device_placement=FLAGS.log_device_placement) #config.gpu_options.allow_growth = True #config.gpu_options.per_process_gpu_memory_fraction = GPU_MEM_FRACTION #config.allow_soft_placement = True self.sess = tf.Session() #config=config self.sess.run(tf.global_variables_initializer()) total_steps = 0 avg_time = 0.0 losses = [] num_epochs = self.classif_core.epochs outs_before = [0,0,0,[0,0]] for epoch in range(num_epochs): self.classif_minibatch.shuffle() iter = 0 while(not self.classif_minibatch.end()): feed_dict = self.classif_minibatch.next() # self.project_GD() self.sess.run([self.opt_op], feed_dict=feed_dict) outs = self.sess.run([self.loss, self.graphL_W, self.adj_mat, self.variables, self.loss_class, self.loss_graphL, self.Z, self.Theta], feed_dict=feed_dict) losses.append(outs[0]) # print('') # print(' loss; %f, loss-class: %f, loss-graphL: %f' %(outs[0], outs[4], outs[5])) # print(' A after projection: \n', (outs[2]+1)/2) if(self.graphL_core.coordinate_gradient): self.adj_mat_coordinate_descent() elif(self.graphL_core.projected_gradient): self.project_GD() outs_after = self.sess.run([self.adj_mat, self.variables], feed_dict=feed_dict) # print(' A diff-inner: ', np.sum(np.abs(outs_after[0]-outs[2]))) # print(' variables0 diff-inner: ', np.sum(np.abs(outs_after[1][0]-outs[3][0]))) # print(' variables1 diff-inner: ', np.sum(np.abs(outs_after[1][1]-outs[3][1]))) # print('') # A_diff = np.sum(np.abs(outs_after[0]-outs_before[2])) # print(' A diff: ', A_diff) # (1+outs_after[0])/2 # print(' variables0 diff: ', np.sum(np.abs(outs_after[1][0]-outs_before[3][0]))) # print(' variables1 diff: ', np.sum(np.abs(outs_after[1][1]-outs_before[3][1]))) # if(A_diff<self.classif_core.A_proj_th and A_diff>0 and iter>10): # self.project_GD() outs_before = outs.copy() # print('Sample Z: ', outs[6][3][0]) # print(' : ', outs[6][3][3]) # print('Sample W: ', np.reshape(outs[1][3], (self.graphL_core.num_nodes, self.graphL_core.num_nodes))) # np.savetxt('Sample_Z.txt', outs[6][3]) # np.savetxt('Sample_W.txt', outs[1][3]) iter += 1 total_steps += 1 if total_steps > self.classif_core.max_total_steps: break print(' epoch: ', epoch) if total_steps > self.classif_core.max_total_steps: break print(' Final A: ', (outs_after[0]+1)/2) print(' Final Theta: \n', outs[7]) # plotting_figure(np.array(losses), 'loss') print(' time elapsed: ', time.get_seconds()-start_time)
def parse_input_data(filename, ref = 'None'): def read_mat_data(filename): if os.path.exists(filename): mat_data = scipy.io.loadmat(filename) else: raise Exception("file %s not found" % filename) return mat_data def report_time(start): print '(%ds)' % (time.get_seconds() - start) new_start = time.get_seconds() return new_start # for each data point in ictal, interictal and test, # generate (X, <y>, <latency>) per channel def get_data(mat_data, data_type = 'data', problem_channels = []): print 'Loading data', if 'data_behavior' in mat_data: dataKey = 'data_behavior' elif 'data_3sFIR' in mat_data: dataKey = 'data_3sFIR' else: dataKey = 'data' print "mat:", mat_data[dataKey].shape data = mat_data[dataKey][0:TOTAL_CH_NUM,:] if len(problem_channels)!=0: for each_channel in problem_channels: data = np.delete(data, each_channel-1, axis = 0) if data_type == 'data': print 'Data:', data.shape, data return data elif data_type == 'latencies': if mat_data[dataKey].shape[0] > TOTAL_CH_NUM: latencies = mat_data[dataKey][TOTAL_CH_NUM, :] else: latencies = np.zeros(len(data[0])) print 'Latencies:', latencies return latencies def plot_EEG(data): """ Plot out the original EEG signals. """ print 'Plotting out the original EEG signals... ', channels_fig = plt.figure() x1 = np.arange(START_TIME, END_TIME, 1.0/SAMPLE_FREQUENCY) for i in range(0,CH_NUM): plt.subplot(CH_NUM, 1, i+1) plt.plot(x1, data[i,START_TIME*SAMPLE_FREQUENCY:END_TIME*SAMPLE_FREQUENCY]) plt.show() def plot_data(data, plot_name = 'None', s = START_TIME, e = END_TIME, period = 1.0/SAMPLE_FREQUENCY): """ Plot out abitrary data. """ print 'Plotting out figure:', plot_name plt.figure() plt.title(plot_name) x1 = np.arange(s, e, period) col = data.shape[0] for i in range(0, col): if i==1: plt.title(plot_name) plt.subplot(col, 1, i+1) plt.plot(x1, data[i]) plt.ylim(-0.0015, 0.0015) #plt.show() def calculate_eigenvalue_ref(data, data_type = 'None'): """ Using sliding window to calculate the change of eigenvalue with time. """ print 'Calculating reference change of eigenvalue in ', data_type, ' domain' #the change of eigenvalue with time in frequency/time domain eigen_ref = [] for i in range(int(REF_TIME_START*SAMPLE_FREQUENCY), int(REF_TIME_END*SAMPLE_FREQUENCY)): if data_type == 'Time': data_correlation = transforms.TimeCorrelation_whole(50, 'usf').apply(data[:, i:i+WINDOW_RANGE]) elif data_type == 'Frequency': data_correlation = transforms.FreqCorrelation_whole(1, 50, 'usf').apply(data[:, i:i+WINDOW_RANGE]) w = transforms.Eigenvalues().apply(data_correlation) eigen_ref.append(w) eigen_ref = np.array(eigen_ref) eigen_ref = np.swapaxes(eigen_ref, 0, 1) print data_type, ' eigen ref:', eigen_ref.shape print eigen_ref return eigen_ref def calculate_eigen_change(data, ref_mean, ref_std, data_type = 'none'): eigen_change = [] for i in range(int(START_TIME*SAMPLE_FREQUENCY), int(END_TIME*SAMPLE_FREQUENCY), SAMPLE_FREQUENCY/4): if (data_type == 'Time'): data_correlation = transforms.TimeCorrelation_whole(50, 'usf').apply(data[:, i:i+WINDOW_RANGE]) elif (data_type == 'Frequency'): data_correlation = transforms.FreqCorrelation_whole(1, 50, 'usf').apply(data[:, i:i+WINDOW_RANGE]) w = transforms.Eigenvalues().apply(data_correlation) eigen_change.append(w) eigen_change = np.array(eigen_change) eigen_change = np.swapaxes(eigen_change, 0, 1) print data_type,' change:', eigen_change for i in range (0, eigen_change.shape[1]): for j in range(0, CH_NUM): if i < 2: print i, j print eigen_change[j][i], print t_eigen_ref_mean[j], print t_eigen_ref_std[j][0] eigen_change[j][i] = (eigen_change[j][i] - ref_mean[j]) / ref_std[j][0] print data_type, 'eigen change normalized:', eigen_change.shape print eigen_change return eigen_change def calculate_stft(data, start, end, ref = []): stft_change_ref = [] stft_change_ref_mean = [] stft_change_ref_std = [] #stft_change_ref = transforms.STFT(start, end).apply(ref) for i in range(0, CH_NUM): ch_stft_change = [] ch_stft_change_mean = [] ch_stft_change_std = [] for j in range(0, ref.shape[1], STFT_PERIOD): ref_stft = transforms.STFT(start, end).apply(ref[i, j:j+WINDOW_RANGE]) ch_stft_change.append(ref_stft) stft_change_ref.append(ch_stft_change) ch_stft_change = np.array(ch_stft_change[0:120][0:120]) print 'ch change', ch_stft_change.shape #print ch_stft_change ch_stft_change_mean = np.average(ch_stft_change, axis = 0) ch_stft_change = np.swapaxes(ch_stft_change,0,1) ch_stft_change_std = transforms.Stats().apply(ch_stft_change) stft_change_ref_mean.append(ch_stft_change_mean) stft_change_ref_std.append(ch_stft_change_std) stft_change_ref = np.array(stft_change_ref) print 'stft_change_ref', stft_change_ref.shape #print stft_change_ref #stft_change_ref = np.swapaxes(stft_change_ref, 0, 1) #stft_change_ref_mean = np.average(stft_change_ref, axis = 1) stft_change_ref_mean = np.array(stft_change_ref_mean) print 'stft_change_ref_mean', stft_change_ref_mean.shape stft_change_ref_std = np.array(stft_change_ref_std) print 'stft_change_ref_std', stft_change_ref_std.shape stft_change = [] for i in range(0, CH_NUM): ch_stft_change = [] for j in range(int(START_TIME*SAMPLE_FREQUENCY), int(END_TIME*SAMPLE_FREQUENCY), STFT_PERIOD): data_stft = transforms.STFT(start, end).apply(data[i, j:j+WINDOW_RANGE]) for k in range(data_stft.shape[0]): data_stft[k] = (data_stft[k] - stft_change_ref_mean[i][k])/stft_change_ref_std[i][k][0] ch_stft_change.append(data_stft) stft_change.append(ch_stft_change) """ stft_change = [] for i in range(int(START_TIME*SAMPLE_FREQUENCY), int(END_TIME*SAMPLE_FREQUENCY), STFT_PERIOD): data_stft = transforms.STFT(start, end).apply(data[:, i:i+WINDOW_RANGE]) for j in range(data_stft.shape[0]): for k in range(data_stft.shape[1]): if j < 2 and k < 10 and i <100: print j, k print data_stft[j][k] print stft_change_ref_mean[j] print stft_change_ref_std[j][0] data_stft[j][k] = (data_stft[j][k] - stft_change_ref_mean[j])/stft_change_ref_std[j][0] stft_change.append(data_stft) """ stft_change = np.array(stft_change) stft_change = np.swapaxes(stft_change, 0, 1) print 'stft change:', stft_change.shape print stft_change return stft_change def plot_stft(data): fig = plt.figure() """ ax = fig.gca(projection = '3d') X = [] Y = [] Z = [] print 'stft data', data.shape for i in range(0, data.shape[0]): x = [] y = [] for j in range(1, data.shape[1]+1): x.append(i*(float(END_TIME-START_TIME)/data.shape[0])+START_TIME) y.append(j) #Z.append(data[i][j-1]) X.append(x) Y.append(y) X = np.array(X) Y = np.array(Y) Z = np.array(Z) # x, y = np.meshgrid(X, Y) #y, z = np.meshgrid(Y, Z) print 'X:', X.shape #print X print 'Y:', Y.shape #print Y print 'Z:', Z.shape # print Z #ax1 = fig.add_subplot(121) surf = ax.plot_surface(X, Y, data, rstride = 16, cstride = 2, cmap = cm.coolwarm, alpha = 0.3) ax.set_xlabel('Time(s)') ax.set_ylabel('Frequency(Hz)') ax.set_zlabel('Magnitude') fig.colorbar(surf, shrink = 0.5) """ data = np.swapaxes(data, 0 ,1) im = plt.imshow(data, origin = 'lower', aspect = 'auto', extent = [START_TIME,END_TIME,0,50], interpolation = 'none') fig.colorbar(im, shrink = 0.5) plt.title('Normalized STFT') plt.xlabel('Time(s)') plt.ylabel('Frequency(Hz)') plt.tight_layout() #adjust the space between plots plt.show() def calculate_slope_ref(data): """ Calculate the standard deviation and normalized slope to define seizures. """ print 'Calculating the reference slope and change of slope ... ' #reference slope slope_stats = [] for i in range(int(REF_TIME_START*SAMPLE_FREQUENCY), int(REF_TIME_END*SAMPLE_FREQUENCY)): slopes = [] for j in range(0, CH_NUM): slope = (data[j, i+1] - data[j, i] ) * SAMPLE_FREQUENCY slopes.append(slope) slope_stats.append(slopes) slope_stats = np.array(slope_stats) slope_stats = np.swapaxes(slope_stats, 0 ,1) slope_stats = transforms.Stats().apply(slope_stats) print "Slope stats:", slope_stats.shape print slope_stats return slope_stats def calculate_slope_change(data, slope_stats, data_type = 'change'): #change of slope #note: smoothed by SMOOTHING_PERIOD s average, calculated for each sec slope_change = [] seizure_num_by_slope = [] for i in range(int(START_TIME*SAMPLE_FREQUENCY), int(END_TIME*SAMPLE_FREQUENCY), SAMPLE_FREQUENCY): seizure_channels_by_slope = 0 slopes = [] for j in range(0, CH_NUM): average_slope = 0.0 for k in range(0, int(SMOOTHING_PERIOD*SAMPLE_FREQUENCY)): slope = (data[j, i+1+k] - data[j, i+k] ) * SAMPLE_FREQUENCY average_slope += abs(slope) average_slope /= SMOOTHING_PERIOD*SAMPLE_FREQUENCY slope_normalized = abs(average_slope / slope_stats[j][0]) #slope_normalized = abs(slope / slope_stats[j][0]) if (slope_normalized > SLOPE_THRESHOLD): seizure_channels_by_slope += 1 slopes.append(slope_normalized) slope_change.append(slopes) seizure_num_by_slope.append(seizure_channels_by_slope) if data_type == 'change': slope_change = np.array(slope_change) print 'slope change of each channel', slope_change.shape print slope_change return slope_change elif data_type == 'num': seizure_num_by_slope = np.array(seizure_num_by_slope) print 'seizure_num_by_slope', seizure_num_by_slope return seizure_num_by_slope def plot_figures(latencies = [], seizure_num_by_slope = [], slope_change = [], t_eigen_change = [], f_eigen_change = [], stft_change = []): #Plot out the seizure period and correlation structure. print 'Plotting out the other figures.. ', #seizure onset by observation fig = plt.figure() plt.subplot(ROW_NUM, COL_NUM, 3) plt.title('Seizure Time by Behavior') x2 = np.arange(START_TIME+BEHAVIOR_SHIFT, END_TIME+BEHAVIOR_SHIFT, 1.0/SAMPLE_FREQUENCY) plt.plot(x2, latencies[START_TIME*SAMPLE_FREQUENCY:END_TIME*SAMPLE_FREQUENCY]) plt.axis([START_TIME, END_TIME, 0, 7]) plt.xlabel('time(s)') plt.ylabel('seizure status') #seizure onset by slope_normalized > 2.5 plt.subplot(ROW_NUM, COL_NUM, 4) plt.title('Seizure Time by (Normalized Slope > 2.5) num ') #x3 = np.arange(START_TIME, END_TIME, 1.0/SAMPLE_FREQUENCY) x3 = np.arange(START_TIME, END_TIME, 1) #plt.plot(x3, slope_change) plt.plot(x3, seizure_num_by_slope) plt.axis([START_TIME, END_TIME, 0, CH_NUM]) plt.ylabel('# of (sn > 2.5)') if len(slope_change) != 0: #slope change of each channel slope_change = np.array(slope_change) slope_change = np.swapaxes(slope_change, 0, 1) plt.subplot(ROW_NUM, COL_NUM, 1) plt.title('Slope change of each channel(moving average by 5 sec)') im = plt.imshow(slope_change, origin = 'lower', aspect = 'auto', extent = [START_TIME,END_TIME,1,CH_NUM], interpolation = 'none') plt.ylabel('channel') fig.subplots_adjust(right = 0.93) plt.clim(COLOR_MIN, COLOR_MAX) cbax = fig.add_axes([0.94, 0.82, 0.01,0.12]) fig.colorbar(im, cax = cbax) elif (len(stft_change)!=0): plt.subplot2grid((ROW_NUM, COL_NUM), (0,0), rowspan = 2) stft_change = np.swapaxes(stft_change, 0 ,1) im = plt.imshow(stft_change, origin = 'lower', aspect = 'auto', extent = [START_TIME,END_TIME,0,50], interpolation = 'none') plt.title('Normalized STFT') plt.xlabel('Time(s)') plt.ylabel('Frequency(Hz)') plt.tight_layout() #adjust the space between plots fig.subplots_adjust(right = 0.93) plt.clim(COLOR_MIN, COLOR_MAX) cbax = fig.add_axes([0.94, 0.82, 0.01,0.12]) fig.colorbar(im, cax = cbax) else: #time correlation plt.subplot(ROW_NUM, COL_NUM, 1) plt.title('Time Domain Correlation Analysis (Normalized)') plt.imshow(t_eigen_change, origin = 'lower', aspect = 'auto', extent = [START_TIME,END_TIME,0,7], interpolation = 'none') plt.ylabel('eigenvalues') plt.clim(COLOR_MIN, COLOR_MAX) #phase correlation #f_eigen_change = np.array(f_eigen_change) #f_eigen_change = np.swapaxes(f_eigen_change, 0, 1) print "f eigen change", f_eigen_change.shape plt.subplot(ROW_NUM, COL_NUM, 2) plt.title('Frequency Domain Correlation Analysis (Normalized)') im = plt.imshow(f_eigen_change, origin = 'lower', aspect = 'auto', extent = [START_TIME,END_TIME,0,7], interpolation = 'none') #plt.colorbar() plt.tight_layout() #adjust the space between plots fig.subplots_adjust(right = 0.93) plt.clim(-5,8) cbax = fig.add_axes([0.94, 0.82, 0.01,0.12]) fig.colorbar(im, cax = cbax) plt.show() start = time.get_seconds() initial_start = time.get_seconds() mat_data = read_mat_data(filename) #data = get_data(mat_data) data = get_data(mat_data, problem_channels = PROBLEM_CH) start = report_time(start) plot_data(data[STFT_CH:STFT_CH+1,START_TIME*SAMPLE_FREQUENCY:END_TIME*SAMPLE_FREQUENCY], plot_name = 'EEG') #plot_EEG(data) start = report_time(start) if ref!='None': print "Reference Data:", ref ref_mat_data = read_mat_data(ref) ref_data = get_data(ref_mat_data, problem_channels = PROBLEM_CH) #plot_data(ref_data[:,REF_TIME_START*SAMPLE_FREQUENCY:REF_TIME_END*SAMPLE_FREQUENCY], plot_name = 'Reference EEG', s = REF_TIME_START, e = REF_TIME_END) else: print "Reference Data:", filename ref_data = data slope_ref = calculate_slope_ref(ref_data) #slope_change = calculate_slope_change(data, slope_ref, 'change') slope_num = calculate_slope_change(data, slope_ref, 'num') start = report_time(start) print 'ref_data', ref_data.shape #stft_change_ref = calculate_stft(ref_data[:, REF_TIME_START*SAMPLE_FREQUENCY:REF_TIME_END*SAMPLE_FREQUENCY], 1, 50) stft_change = calculate_stft(data, 1, 50, ref = ref_data[:, REF_TIME_START*SAMPLE_FREQUENCY:REF_TIME_END*SAMPLE_FREQUENCY]) start = report_time(start) stft_change = np.swapaxes(stft_change, 0 , 1) print 'stft change swap', stft_change.shape #plot_stft(stft_change[STFT_CH]) plot_figures(latencies = get_data(mat_data, 'latencies'), seizure_num_by_slope = slope_num, # slope_change = slope_change, # t_eigen_change = t_eigen_change, #f_eigen_change = f_eigen_change, stft_change = stft_change[STFT_CH] ) """ t_eigen_ref = calculate_eigenvalue_ref(ref_data, data_type = "Time") start = report_time(start) t_eigen_ref_std = transforms.Stats().apply(t_eigen_ref) print 'ref std:' print t_eigen_ref_std start = report_time(start) t_eigen_ref_mean = np.average(t_eigen_ref, axis = 1) print 'ref avg:' print t_eigen_ref_mean start = report_time(start) t_eigen_change = calculate_eigen_change(data, t_eigen_ref_mean, t_eigen_ref_std, data_type = 'Time') start = report_time(start) f_eigen_ref = calculate_eigenvalue_ref(ref_data, data_type = 'Frequency') f_eigen_ref_std = transforms.Stats().apply(f_eigen_ref) f_eigen_ref_mean = np.average(f_eigen_ref, axis = 1) f_eigen_change = calculate_eigen_change(data, f_eigen_ref_mean, f_eigen_ref_std, data_type = 'Frequency') start = report_time(start) plot_figures(latencies = get_data(mat_data, 'latencies'), seizure_num_by_slope = slope_num, # slope_change = slope_change, t_eigen_change = t_eigen_change, f_eigen_change = f_eigen_change ) """ """ plot_figures(latencies = get_data(mat_data, 'latencies'), seizure_num_by_slope = slope_num, slope_change = slope_change) """ print '======================' print 'Total time:', start = report_time(initial_start) print
def predict_all(make_predictions): for pipeline in pipelines: for (classifier, classifier_name) in classifiers: print 'Using pipeline %s with classifier %s' % (pipeline.get_name(), classifier_name) lines = ['clip,preictal'] subjectID = 0 X_train = y_train = X_test = test_size = [] for target in targets: task_core = TaskCore(cached_data_loader=cached_data_loader, data_dir=data_dir, target=target, pipeline=pipeline, classifier_name=classifier_name, classifier=classifier, normalize=should_normalize(classifier), gen_preictal=pipeline.gen_preictal, cv_ratio=cv_ratio) data = GetCrossSubjectDataTask(task_core).run() # a = np.shape(data.X_test)[0] test_size.append(np.shape(data.X_test)[0]) if subjectID > 0: X_train = np.concatenate((X_train, data.X_train), axis=0) y_train = np.concatenate((y_train, data.y_train), axis=0) X_test = np.concatenate((X_test, data.X_test), axis=0) else: X_train = data.X_train y_train = data.y_train X_test = data.X_test subjectID += 1 #Training task_core = TaskCore(cached_data_loader=cached_data_loader, data_dir=data_dir, target=[], pipeline=pipeline, classifier_name=classifier_name, classifier=classifier, normalize=should_normalize(classifier), gen_preictal=pipeline.gen_preictal, cv_ratio=cv_ratio) y_train = np.ceil(0.1*y_train) y_train.astype('int_') if should_normalize(classifier): X_train, temp = normalize_data(X_train, X_train) print "Training ..." print 'Dim', np.shape(X_train), np.shape(y_train) start = time.get_seconds() classifier.fit(X_train, y_train) elapsedSecs = time.get_seconds() - start print "t=%ds" % int(elapsedSecs) y_estimate = classifier.predict_proba(X_train) lr = LogisticRegression(random_state = 0) lr.fit(y_estimate, y_train) predictions_proba = classifier.predict_proba(X_test) predictions_calibrated = lr.predict_proba(predictions_proba) #output m = 0 totalSample = 12 startIdx = 0 for target in targets: for i in range(test_size[m]/totalSample): j = i+1 if j < 10: nstr = '000%d' %j elif j < 100: nstr = '00%d' %j elif j < 1000: nstr = '0%d' %j else: nstr = '%d' %j preictalOverAllSample = 0 for k in range(totalSample): p = predictions_calibrated[i*totalSample+k+startIdx] preictal = translate_prediction(p) preictalOverAllSample += preictal/totalSample newline = '%s_test_segment_%s.mat,%.15f' % (target, nstr, preictalOverAllSample) lines.append(newline) print newline startIdx = startIdx + test_size[m] m += 1 filename = 'submission%d-%s_%s.csv' % (ts, classifier_name, pipeline.get_name()) filename = os.path.join(submission_dir, filename) with open(filename, 'w') as f: print >> f, '\n'.join(lines) print 'wrote', filename
def process_raw_data(mat_data, with_latency): start = time.get_seconds() print 'Loading data', X = [] y = [] latencies = [] prev_data = None prev_latency = None for segment in mat_data: data = segment['data'] transformed_data = pipeline.apply(data) if with_latency: # this is ictal latency = segment['latency'][0] if latency <= 15: y_value = 0 # ictal <= 15 else: y_value = 1 # ictal > 15 # generate extra ictal training data by taking 2nd half of previous # 1-second segment and first half of current segment # 0.5-1.5, 1.5-2.5, ..., 13.5-14.5, ..., 15.5-16.5 # cannot take half of 15 and half of 16 because it cannot be strictly labelled as early or late if gen_ictal and prev_data is not None and prev_latency + 1 == latency and prev_latency != 15: # gen new data :) axis = prev_data.ndim - 1 def split(d): return np.split(d, 2, axis=axis) new_data = np.concatenate( (split(prev_data)[1], split(data)[0]), axis=axis) X.append(pipeline.apply(new_data)) y.append(y_value) latencies.append(latency - 0.5) y.append(y_value) latencies.append(latency) prev_latency = latency elif y is not None: # this is interictal y.append(2) X.append(transformed_data) prev_data = data print '(%ds)' % (time.get_seconds() - start) X = np.array(X) y = np.array(y) latencies = np.array(latencies) if ictal: print 'X', X.shape, 'y', y.shape, 'latencies', latencies.shape return X, y, latencies elif interictal: print 'X', X.shape, 'y', y.shape return X, y else: print 'X', X.shape return X
def _load_data(self): """ .. todo:: WRITEME """ import common.time as time start = time.get_seconds() from seizure.tasks import load_mat_data, count_mat_data from seizure.transforms import UnitScaleFeat, UnitScale import seizure.tasks seizure.tasks.task_predict = True # data_type is one of ('preictal', 'interictal', 'test') # target is one of 'Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2' data_dir = self.path data_types = ['preictal', 'interictal'] if self.expect_labels else ['test'] N = 0 for data_type in data_types: for i in count_mat_data(data_dir, self.target, data_type): N += 1 print 'Number of segments', N Nf = None row = 0 count = 0 for data_type in data_types: mat_data = load_mat_data(data_dir, self.target, data_type) for segment in mat_data: for key in segment.keys(): if not key.startswith('_'): break data = segment[key]['data'][0,0] assert data.shape[-1] == self.Nsamples istartend = np.linspace(0.,self.Nsamples - self.window_size, self.nwindows) for i in range(self.nwindows): count += 1 if (count-1) % self.skip != 0: continue window = data[:,int(istartend[i]):int(istartend[i] + self.window_size)] if Nf is None: Nchannels = window.shape[0] print 'Number of channels', Nchannels N *= Nchannels * self.nwindows / self.skip print 'Number of examples', N Nf = window.shape[1] print 'Number of features', Nf X = np.empty((N, Nf)) y = np.empty(N) if self.scale_option == 'usf': window = UnitScaleFeat().apply(window) elif self.scale_option == 'us': window = UnitScale().apply(window) X[row:row+Nchannels, :] = window y[row:row+Nchannels] = (0 if data_type == 'interictal' else 1) row += Nchannels if self.expect_labels: if self.one_hot: # get unique labels and map them to one-hot positions labels = np.unique(y) labels = dict((x, i) for (i, x) in enumerate(labels)) one_hot = np.zeros((y.shape[0], len(labels)), dtype='float32') for i in xrange(y.shape[0]): label = y[i] label_position = labels[label] one_hot[i, label_position] = 1. y = one_hot print X.shape, y.shape, y.mean(axis=-1) print 'time %ds' % (time.get_seconds() - start) return X, y
def process_raw_data(mat_data, with_latency): start = time.get_seconds() initial_start = time.get_seconds() print 'Loading data', if 'data_behavior' in mat_data: dataKey = 'data_behavior' elif 'data_3sFIR' in mat_data: dataKey = 'data_3sFIR' else: dataKey = 'data' print "mat:", mat_data[dataKey].shape data = mat_data[dataKey][0:CH_NUM, :] print data.shape, data if mat_data[dataKey].shape[0] > CH_NUM: latencies = mat_data[dataKey][CH_NUM, :] else: latencies = np.zeros(len(data[0])) print latencies """ Plot out the original EEG signals. """ print 'Plotting out the original EEG signals... ', channels_fig = plt.figure() x1 = np.arange(START_TIME, END_TIME, 1.0 / SAMPLE_FREQUENCY) for i in range(0, CH_NUM): plt.subplot(CH_NUM, 1, i + 1) plt.plot( x1, data[i, START_TIME * SAMPLE_FREQUENCY:END_TIME * SAMPLE_FREQUENCY]) print '(%ds)' % (time.get_seconds() - start) start = time.get_seconds() """ Using sliding window to calculate the change of eigenvalue with time. """ print 'Calculating change of eigenvalue in frequncy and time domain ... ', #the change of eigenvalue with time in frequency/time domain t_eigen_ref = [] for i in range(int(REF_TIME_START * SAMPLE_FREQUENCY), int(REF_TIME_END * SAMPLE_FREQUENCY)): data_tc = transforms.TimeCorrelation_whole(50, 'usf').apply( data[:, i:i + WINDOW_RANGE]) w = transforms.Eigenvalues().apply(data_tc) t_eigen_ref.append(w) t_eigen_ref = np.array(t_eigen_ref) t_eigen_ref = np.swapaxes(t_eigen_ref, 0, 1) t_eigen_ref_std = transforms.Stats().apply(t_eigen_ref) t_eigen_ref_mean = np.average(t_eigen_ref, axis=1) print 't eigen ref', t_eigen_ref print "t eigen ref std", t_eigen_ref_std print 't eigen ref mean', t_eigen_ref_mean f_eigen_change = [] t_eigen_change = [] for i in range(int(START_TIME * SAMPLE_FREQUENCY), int(END_TIME * SAMPLE_FREQUENCY)): data_tc = transforms.TimeCorrelation_whole(50, 'usf').apply( data[:, i:i + WINDOW_RANGE]) w = transforms.Eigenvalues().apply(data_tc) t_eigen_change.append(w) data_fc = transforms.FreqCorrelation_whole(1, 50, 'usf').apply( data[:, i:i + WINDOW_RANGE]) w = transforms.Eigenvalues().apply(data_fc) f_eigen_change.append(w) t_eigen_change = np.array(t_eigen_change) t_eigen_change = np.swapaxes(t_eigen_change, 0, 1) print 't eigen change', t_eigen_change for i in range(0, t_eigen_change.shape[1]): for j in range(0, CH_NUM): t_eigen_change[j][i] = ( t_eigen_change[j][i] - t_eigen_ref_mean[j]) / t_eigen_ref_std[j][0] if i < 2: print i, j print t_eigen_change[j][i] print t_eigen_ref_mean[j] print t_eigen_ref_std[j][0] print 't eigen change normalized', t_eigen_change """ for i in range(0, len(f_eigen_change)): f_avg = 0 t_avg = 0 for j in range(0, SMOOTHING_PERIOD): f_avg += f_eigen_change[] """ print '(%ds)' % (time.get_seconds() - start) start = time.get_seconds() """ Calculate the standard deviation and normalized slope to define seizures. """ print 'Calculating the reference slope and change of slope ... ', #reference slope slope_stats = [] for i in range(int(REF_TIME_START * SAMPLE_FREQUENCY), int(REF_TIME_END * SAMPLE_FREQUENCY)): slopes = [] for j in range(0, CH_NUM): slope = (data[j, i + 1] - data[j, i]) * SAMPLE_FREQUENCY slopes.append(slope) slope_stats.append(slopes) slope_stats = np.array(slope_stats) slope_stats = transforms.Stats().apply(slope_stats) print "slope stats:", slope_stats.shape #change of slope #note: smoothed by SMOOTHING_PERIOD s average, calculated for each sec slope_change = [] seizure_num_by_slope = [] for i in range(int(START_TIME * SAMPLE_FREQUENCY), int(END_TIME * SAMPLE_FREQUENCY), SAMPLE_FREQUENCY): seizure_channels_by_slope = 0 slopes = [] for j in range(0, CH_NUM): average_slope = 0.0 for k in range(0, SMOOTHING_PERIOD * SAMPLE_FREQUENCY): slope = (data[j, i + 1 + k] - data[j, i + k]) * SAMPLE_FREQUENCY average_slope += slope average_slope /= SMOOTHING_PERIOD slope_normalized = abs(average_slope / slope_stats[j][0]) #slope_normalized = abs(slope / slope_stats[j][0]) if (slope_normalized > SLOPE_THRESHOLD): seizure_channels_by_slope += 1 slopes.append(slope_normalized) slope_change.append(slopes) seizure_num_by_slope.append(seizure_channels_by_slope) slope_change = np.array(slope_change) print 'slope change of each channel', slope_change.shape seizure_num_by_slope = np.array(seizure_num_by_slope) print 'seizure_num_by_slope', seizure_num_by_slope print '(%ds)' % (time.get_seconds() - start) start = time.get_seconds() #Plot out the seizure period and correlation structure. print 'Plotting out the other figures.. ', #seizure onset by observation fig = plt.figure() plt.subplot(ROW_NUM, COL_NUM, 3) plt.title('Seizure Time by Behavior') x2 = np.arange(START_TIME, END_TIME, 1.0 / SAMPLE_FREQUENCY) plt.plot( x2, latencies[START_TIME * SAMPLE_FREQUENCY:END_TIME * SAMPLE_FREQUENCY]) plt.axis([START_TIME, END_TIME, 0, 5]) plt.xlabel('time(s)') plt.ylabel('seizure status') #seizure onset by slope_normalized > 2.5 plt.subplot(ROW_NUM, COL_NUM, 4) plt.title('Seizure Time by (Normalized Slope > 2.5) num ') #x3 = np.arange(START_TIME, END_TIME, 1.0/SAMPLE_FREQUENCY) x3 = np.arange(START_TIME, END_TIME, 1) #plt.plot(x3, slope_change) plt.plot(x3, seizure_num_by_slope) plt.axis([START_TIME, END_TIME, 0, 8]) plt.ylabel('# of (sn > 2.5)') #slope change of each channel slope_change = np.array(slope_change) slope_change = np.swapaxes(slope_change, 0, 1) plt.subplot(ROW_NUM, COL_NUM, 1) plt.title('Slope change of each channel(moving average by 5 sec)') plt.imshow(slope_change, origin='lower', aspect='auto', extent=[START_TIME, END_TIME, 1, CH_NUM], interpolation='none') plt.ylabel('channel') #plt.colorbar() """ #time correlation plt.subplot(ROW_NUM, COL_NUM, 1) plt.title('Time Domain Correlation Analysis') plt.imshow(t_eigen_change, origin = 'lower', aspect = 'auto', extent = [START_TIME,END_TIME,0,7])#, interpolation = 'none') plt.ylabel('eigenvalues') plt.colorbar() #phase correlation f_eigen_change = np.array(f_eigen_change) f_eigen_change = np.swapaxes(f_eigen_change, 0, 1) print "f eigen change", f_eigen_change.shape plt.subplot(ROW_NUM, COL_NUM, 2) plt.title('Frequency Domain Correlation Analysis') plt.imshow(f_eigen_change, origin = 'lower', aspect = 'auto', extent = [START_TIME,END_TIME,0,7], interpolation = 'none') #plt.colorbar() print '(%ds)' % (time.get_seconds() - start) start = time.get_seconds() latencies = np.array(latencies) print latencies """ plt.tight_layout() #adjust the space between plots plt.show()