示例#1
0
def save_pickle_file(filename, data):
    start = time.get_seconds()
    filename = filename + ".pickle"
    print "Dumping to %s" % filename,
    with open(filename, "w") as f:
        pickle.dump(data, f)
        print "%ds" % (time.get_seconds() - start)
示例#2
0
    def EU_matlab_run(self, load_Core):
        start_time = time.get_seconds()

        #         load_Core.matlab_engin.loading_EU_main(nargout=0)
        #         X = load_Core.matlab_engin.workspace['X']
        #         y = load_Core.matlab_engin.workspace['y']

        matFile = load_EU_features(self.task_core.data_dir,
                                   self.task_core.target, load_Core)

        matlab_load_core = Matlab_Load_Core(matFile)
        matlab_load_core.target = self.task_core.target
        matlab_load_core.y_train = class_relabel(matlab_load_core.y_train)
        if (matlab_load_core.y_test is not None):
            matlab_load_core.y_test = class_relabel(matlab_load_core.y_test)

        matlab_load_core.structural_inf = load_side_adj(
            self.task_core.sidinfo_dir, self.task_core.target,
            self.task_core.adj_calc_mode, load_Core)

        print('    X training', matlab_load_core.X_train.shape, 'y training',
              matlab_load_core.y_train.shape)
        print('    X testing', matlab_load_core.X_test.shape, 'y testing',
              matlab_load_core.y_test.shape)

        matlab_load_core.settings_TrainNumFiles, matlab_load_core.settings_TestNumFiles = load_EU_settings(
            self.task_core.settings_dir, self.task_core.target, load_Core)

        print('    time elapsed: ', time.get_seconds() - start_time)
        return matlab_load_core.X_train.shape[
            1], matlab_load_core.X_train.shape[2:], matlab_load_core
示例#3
0
def save_pickle_file(filename, data):
    start = time.get_seconds()
    filename = filename + '.pickle'
    print 'Dumping to %s' % filename,
    with open(filename, 'w') as f:
        pickle.dump(data, f)
        print '%ds' % (time.get_seconds() - start)
    def load_data(self):
        global significant_channels
        global subj
        subj = self.task_core.target  # to be used in auc record
        start = time.get_seconds()
        filename = 'data-cache/significant_channels_%s' % self.task_core.target
        significant_channels = SignificantChannels(self.task_core).load_data()
        save_hkl_file(filename, significant_channels)
        print significant_channels
        print 'ACS time is %d s' % (time.get_seconds() - start)

        start = time.get_seconds()
        data = TrainingDataTask(self.task_core).run()
        y_classes = data.y_classes
        del data

        point = time.get_seconds()
        time_prepare = (point - start)
        print 'Time to prepare data for %s is %f seconds.' % (
            self.task_core.target, time_prepare)

        classifier_data = TrainClassifierTask(self.task_core).run()

        point2 = time.get_seconds()
        time_training = (point2 - point)
        print 'Time to train data for %s is %f seconds.' % (
            self.task_core.target, time_training)

        test_data = LoadTestDataTask(self.task_core).run()
        X_test = flatten(test_data.X)

        return make_predictions(self.task_core.target, X_test, y_classes,
                                classifier_data)
示例#5
0
def train(classifier, training_data, quiet=False):
    X_train = training_data.X_train
    y_train = training_data.y_train
    if not quiet: print 'Training ...',
    start = time.get_seconds()
    classifier.fit(X_train, y_train)
    if not quiet: print '%ds' % (time.get_seconds() - start)
示例#6
0
    def MITCHB_run(self, load_Core):
        start_time = time.get_seconds()
        out_data = load_edf_data(self.task_core.data_dir,
                                 self.task_core.target, load_Core)
        num_clips = []
        if (load_Core.concat):
            X = None
            y = None
        else:
            X = []
            y = []

        for data, file_name, seizure_start_time_offsets, seizure_lengths in out_data:
            inner_x, inner_y, num_nodes, dim, conv_sizes = windowing_data(
                data, seizure_start_time_offsets, seizure_lengths, load_Core)
            #             print('inner_x: ', np.array(inner_x).shape)
            #             num_clips.append(np.array(inner_x).shape[0])
            if (load_Core.concat):
                if (X is None):
                    X = np.array(inner_x)
                    y = np.array(inner_y)
                else:
                    X = np.concatenate((X, inner_x), axis=0)
                    y = np.concatenate((y, inner_y), axis=0)
            else:
                X.append(inner_x)
                y.append(inner_y)

        X = np.array(X)
        y = np.array(y)
        print('    X', X.shape, 'y', y.shape)
        print('    time elapsed: ', time.get_seconds() - start_time)
        return X, y, num_nodes, dim, conv_sizes  #, num_clips
示例#7
0
def save_pickle_file(filename, data):
    start = time.get_seconds()
    filename = filename + '.pickle'
    print 'Dumping to %s' % filename,
    with open(filename, 'w') as f:
        pickle.dump(data, f)
        print '%ds' % (time.get_seconds() - start)
示例#8
0
def train_calibrator(y, y_estimate, plot2file):
    print("Training calibrator...")
    start = time.get_seconds()
    preictal_predictions = []
    p_y_cv = [0.0 if x == 0.0 else 1.0 for x in y]
    for i in range(len(y_estimate)):
        p = y_estimate[i]
        preictal = translate_prediction(p)
        preictal_predictions.append(preictal)

    fpr, tpr, thresholds = roc_curve(p_y_cv, preictal_predictions)
    p_roc_auc = auc(fpr, tpr)

    y_av = np.average(p_y_cv)
    y_std = np.std(p_y_cv)
    ye_av = np.average(preictal_predictions)
    ye_std = np.std(preictal_predictions)

    pl.clf()
    pl.hist(preictal_predictions, bins=50)
    pl.xlabel('preictal estimate')
    pl.ylabel('counts')
    pl.title('CV histogram (mean_cv= %0.3f, mean_es=%0.3f, std_es=%0.3f)' %
             (y_av, ye_av, ye_std))
    #     pl.show()
    plot2file.savefig()
    calibrate_matrix = np.array([ye_av, ye_std])

    elapsedSecs = time.get_seconds() - start
    print("t=%ds score=%f" % (int(elapsedSecs), p_roc_auc))
    return calibrate_matrix
示例#9
0
def train(classifier, X_train, y_train, X_cv, y_cv, y_classes):
    print "Training ..."
    print 'Dim', 'X', np.shape(X_train), 'y', np.shape(y_train), 'X_cv', np.shape(X_cv), 'y_cv', np.shape(y_cv)

    start = time.get_seconds()
    total = y_train.shape[0]


    ictalnum = sum(y_train)
    interictalnum = total - ictalnum
    print ictalnum
    print interictalnum

    weight = np.concatenate((interictalnum/ictalnum*np.ones(ictalnum),np.ones(interictalnum)))
    print weight


    #classifier.fit(X_train, y_train, sample_weight=weight)
    classifier.fit(X_train,y_train)
    print "Scoring..."
    S= score_classifier_auc(classifier, X_cv, y_cv, y_classes)
    score = S

    elapsedSecs = time.get_seconds() - start
    print "t=%ds score=%f" % (int(elapsedSecs), score)
    return score, S
示例#10
0
def train_calibrator(y, y_estimate, plot2file):
    print "Training calibrator..."
    start = time.get_seconds()
    preictal_predictions = []
    p_y_cv = [0.0 if x == 0.0 else 1.0 for x in y]
    for i in range(len(y_estimate)):
        p = y_estimate[i]
        preictal = translate_prediction(p)
        preictal_predictions.append(preictal)

    fpr, tpr, thresholds = roc_curve(p_y_cv, preictal_predictions)
    p_roc_auc = auc(fpr, tpr)
    
    y_av = np.average(p_y_cv)
    y_std = np.std(p_y_cv)
    ye_av = np.average(preictal_predictions)
    ye_std = np.std(preictal_predictions)
    
    pl.clf()
    pl.hist(preictal_predictions, bins=50)
    pl.xlabel('preictal estimate')
    pl.ylabel('counts')
    pl.title('CV histogram (mean_cv= %0.3f, mean_es=%0.3f, std_es=%0.3f)' %(y_av, ye_av, ye_std))
#     pl.show()
    plot2file.savefig()
    calibrate_matrix = np.array([ye_av, ye_std])
    
    elapsedSecs = time.get_seconds() - start
    print "t=%ds score=%f" % (int(elapsedSecs), p_roc_auc)
    return calibrate_matrix
示例#11
0
def train(classifier, training_data, quiet=False):
    X_train = training_data.X_train
    y_train = training_data.y_train
    if not quiet: print 'Training ...',
    start = time.get_seconds()
    classifier.fit(X_train, y_train)
    if not quiet: print '%ds' % (time.get_seconds() - start)
示例#12
0
def load_hkl_file(filename):
    hkl_filename = filename + '.hkl'
    if os.path.isfile(hkl_filename):
        start = time.get_seconds()
        data = hkl.load(hkl_filename)
        print 'Loaded %s in %ds' % (hkl_filename, time.get_seconds() - start)
        return data
    return None
示例#13
0
def load_hkl_file(filename):
    hkl_filename = filename + '.hkl'
    if os.path.isfile(hkl_filename):
        start = time.get_seconds()
        data = hkl.load(hkl_filename)
        print 'Loaded %s in %ds' % (hkl_filename, time.get_seconds() - start)
        return data
    return None
示例#14
0
def train_all_data(classifier, X_train, y_train, X_cv, y_cv):
    print "Training ..."
    X = np.concatenate((X_train, X_cv), axis=0)
    y = np.concatenate((y_train, y_cv), axis=0)
    print 'Dim', np.shape(X), np.shape(y)
    start = time.get_seconds()
    classifier.fit(X, y)
    elapsedSecs = time.get_seconds() - start
    print "t=%ds" % int(elapsedSecs)
示例#15
0
def train_all_data(classifier, X_train, y_train, X_cv, y_cv):
    print "Training ..."
    X = np.concatenate((X_train, X_cv), axis=0)
    y = np.concatenate((y_train, y_cv), axis=0)
    print 'Dim', np.shape(X), np.shape(y)
    start = time.get_seconds()
    classifier.fit(X, y)
    elapsedSecs = time.get_seconds() - start
    print "t=%ds" % int(elapsedSecs)
示例#16
0
def load_pickle_file(filename):
    filename = filename + '.pickle'
    if os.path.isfile(filename):
        print 'Loading %s ...' % filename,
        with open(filename) as f:
            start = time.get_seconds()
            data = pickle.load(f)
            print '%ds' % (time.get_seconds() - start)
            return data
    return None
示例#17
0
def load_pickle_file(filename):
    filename = filename + '.pickle'
    if os.path.isfile(filename):
        print 'Loading %s ...' % filename,
        with open(filename) as f:
            start = time.get_seconds()
            data = pickle.load(f)
            print '%ds' % (time.get_seconds() - start)
            return data
    return None
示例#18
0
def load_pickle_file(filename):
    filename = filename + ".pickle"
    if os.path.isfile(filename):
        print "Loading %s ..." % filename,
        with open(filename) as f:
            start = time.get_seconds()
            data = pickle.load(f)
            print "%ds" % (time.get_seconds() - start)
            return data
    return None
示例#19
0
def train(classifier, X_train, y_train, X_cv, y_cv, y_classes):
    print "Training ..."

    print 'Dim', 'X', np.shape(X_train), 'y', np.shape(y_train), 'X_cv', np.shape(X_cv), 'y_cv', np.shape(y_cv)
    start = time.get_seconds()
    classifier.fit(X_train, y_train)
    print "Scoring..."
    score = score_classifier_auc(classifier, X_cv, y_cv, y_classes)

    elapsedSecs = time.get_seconds() - start
    print "t=%ds score=%f" % (int(elapsedSecs), score)
    return score
示例#20
0
def train(classifier, X_train, y_train, X_cv, y_cv, y_classes):
    print("Training ...")

    print('Dim', 'X', np.shape(X_train), 'y', np.shape(y_train), 'X_cv',
          np.shape(X_cv), 'y_cv', np.shape(y_cv))
    start = time.get_seconds()
    classifier.fit(X_train, y_train)
    print("Scoring...")
    score = score_classifier_auc(classifier, X_cv, y_cv, y_classes)

    elapsedSecs = time.get_seconds() - start
    print("t=%ds score=%f" % (int(elapsedSecs), score))
    return score
示例#21
0
def train_all_data(classifier, plot2file, X_train, y_train, X_cv, y_cv):
    print "Training ..."
    X = np.concatenate((X_train, X_cv), axis=0)
    y = np.concatenate((y_train, y_cv), axis=0)
    print 'Dim', np.shape(X), np.shape(y)
    start = time.get_seconds()
    classifier_cv = deepcopy(classifier)
    classifier.fit(X, y)
    classifier_cv.fit(X_train, y_train)
    score_classifier_auc(classifier_cv, plot2file, X_cv, y_cv, y_cv)
    y_estimate = classifier_cv.predict_proba(X_cv)
    elapsedSecs = time.get_seconds() - start
    print "t=%ds" % int(elapsedSecs)
    return y_estimate
示例#22
0
def train_all_data(classifier, plot2file, X_train, y_train, X_cv, y_cv):
    print("Training ...")
    X = np.concatenate((X_train, X_cv), axis=0)
    y = np.concatenate((y_train, y_cv), axis=0)
    print('Dim', np.shape(X), np.shape(y))
    start = time.get_seconds()
    classifier_cv = deepcopy(classifier)
    classifier.fit(X, y)
    classifier_cv.fit(X_train, y_train)
    score_classifier_auc(classifier_cv, plot2file, X_cv, y_cv, y_cv)
    y_estimate = classifier_cv.predict_proba(X_cv)
    elapsedSecs = time.get_seconds() - start
    print("t=%ds" % int(elapsedSecs))
    return y_estimate
示例#23
0
    def process_raw_data(mat_data, ispreictal):
        start = time.get_seconds()
        X = []
        y = []
        latencies = []

        prev_data = None
        prev_latency = None
        for segment in mat_data:

            for key in segment.keys():
                if (key.find('segment')>0):
                    keyname = key

            data = segment[keyname]

            # TODO:[email protected]
            data = data[0,0]
            datas = data['data']
            sz = datas.shape
            for i in range(drate):
                data = datas[:,i*(sz[1]//drate):(i+1)*(sz[1]//drate)]
                transformed_data = pipeline.apply(data)
                if ispreictal:
                    # this is preictal
                    y.append(1)
                else:
                    # this is interictal
                    y.append(0)

                X.append(transformed_data)

                prev_data = data


        print '(%ds)' % (time.get_seconds() - start)

        X = np.array(X)
        y = np.array(y)
        latencies = np.array(latencies)

        if ictal:
            print 'X', X.shape, 'y', y.shape
            return X, y
        elif interictal:
            print 'X', X.shape, 'y', y.shape
            return X, y
        else:
            print 'X', X.shape
            return X
 def load_data(self):
     global significant_channels
     global subj
     subj = self.task_core.target  # to be used in auc record
     start = time.get_seconds()
     significant_channels = SignificantChannels(self.task_core).load_data()
     print significant_channels
     print 'ACS time is %d s' % (time.get_seconds() - start)
     #print aa
     data = TrainingDataTask(self.task_core).run()
     classifier_data = train_classifier(self.task_core.classifier,
                                        data,
                                        normalize=self.task_core.normalize)
     del classifier_data['classifier']  # save disk space
     return classifier_data
示例#25
0
def prepare_training_data(ictal_data, interictal_data, cv_ratio):
    ictal_X, ictal_y = flatten(ictal_data.X), ictal_data.y
    interictal_X, interictal_y = flatten(interictal_data.X), interictal_data.y
    sz = ictal_X.shape
    num = sz[0]
    num2 = (interictal_X.shape)[0]
    sub = random.sample(range(0,interictal_X.shape[0]),min(num*3,num2))
    #sub = random.sample(range(0,interictal_X.shape[0]),num)
    interictal_X = interictal_X[sub,:]
    interictal_y = interictal_y[sub]
    #chop data
    print "chop"
    print interictal_X.shape
    print ictal_X.shape
    # split up data into training set and cross-validation set for both seizure and early sets
    ictal_X_train, ictal_y_train, ictal_X_cv, ictal_y_cv = split_train_random(ictal_X, ictal_y, cv_ratio)
    interictal_X_train, interictal_y_train, interictal_X_cv, interictal_y_cv = split_train_random(interictal_X, interictal_y, cv_ratio)
    print interictal_X_train.shape

    def concat(a, b):
        return np.concatenate((a, b), axis=0)

    X_train = concat(ictal_X_train, interictal_X_train)
    y_train = concat(ictal_y_train, interictal_y_train)
    print X_train.shape
    X_cv = concat(ictal_X_cv, interictal_X_cv)
    y_cv = concat(ictal_y_cv, interictal_y_cv)

    y_classes = np.unique(concat(y_train, y_cv))

    start = time.get_seconds()
    elapsedSecs = time.get_seconds() - start
    print "%ds" % int(elapsedSecs)

    print 'X_train:', np.shape(X_train)
    print 'y_train:', np.shape(y_train)
    print 'X_cv:', np.shape(X_cv)
    print 'y_cv:', np.shape(y_cv)
    print 'y_classes:', y_classes

    return {
        'X_train': X_train,
        'y_train': y_train,
        'X_cv': X_cv,
        'y_cv': y_cv,
        'y_classes': y_classes
    }
    def process_raw_data(mat_data, with_latency):
        start = time.get_seconds()
        print 'Loading data',
        X = []
        y = []
        latencies = []

        for segment in mat_data:
            data = segment['data']
            if (significant_channels is not None):
                data = data[significant_channels]
            if data.shape[-1] > 400:
                data = resample(data, 400, axis=data.ndim - 1)

            if with_latency:
                # this is ictal
                latency = segment['latency'][0]
                if latency <= 15:
                    y_value = 0  # ictal <= 15
                else:
                    y_value = 1  # ictal > 15

                y.append(y_value)
                latencies.append(latency)

                prev_latency = latency
            elif y is not None:
                y.append(2)

            transformed_data = pipeline.apply(data)
            X.append(transformed_data)

        print '(%ds)' % (time.get_seconds() - start)

        X = np.array(X)
        y = np.array(y)
        latencies = np.array(latencies)

        if ictal:
            print 'X', X.shape, 'y', y.shape, 'latencies', latencies.shape
            return X, y, latencies
        elif interictal:
            print 'X', X.shape, 'y', y.shape
            return X, y
        else:
            print 'X', X.shape
            return X
示例#27
0
def train_all_data(classifier, X_train, y_train, X_cv, y_cv):
    print "Training ..."
    X = np.concatenate((X_train, X_cv), axis=0)
    y = np.concatenate((y_train, y_cv), axis=0)
    print 'Dim', np.shape(X), np.shape(y)
    start = time.get_seconds()
    total = (y.shape)[0]
    ictalnum = sum(y)
    interictalnum = total - ictalnum
    print ictalnum
    print interictalnum
    weight = np.concatenate((interictalnum/ictalnum*np.ones(ictalnum),np.ones(interictalnum)))
    print weight
    classifier.fit(X, y, sample_weight= weight)
    #np.set_printoptions(threshold=np.nan)
    elapsedSecs = time.get_seconds() - start
    print "t=%ds" % int(elapsedSecs)
示例#28
0
    def process_raw_data(mat_data):
        start = time.get_seconds()
        print 'Loading data',
        X = []
        y = []
        prev_data = None

        for segment in mat_data:
            data = segment['data']
            yvalue = 1 if preictal else 0
            transformed_data = pipeline.apply(data)

            if gen_preictal and prev_data is not None:
                axis = prev_data.ndim - 1

                def split(d):
                    return np.split(d, 2, axis=axis)

                new_data = np.concatenate(
                    (split(prev_data)[1], split(data)[0]), axis=axis)
                transformed_new_data = pipeline.apply(new_data)
                X.append(transformed_new_data)
                y.append(yvalue)

            X.append(transformed_data)
            y.append(yvalue)
            prev_data = data

        print '(%ds)' % (time.get_seconds() - start)

        X = np.array(X)
        y = np.array(y)

        if preictal:
            print 'X', X.shape, 'y', y.shape
            return X, y
        elif interictal:
            print 'X', X.shape, 'y', y.shape
            return X, y
        else:
            print 'X', X.shape
            return X
示例#29
0
def train_classifier(classifier, data, normalize=False):
    X_train = data.X_train
    y_train = data.y_train
    X_cv = data.X_cv
    y_cv = data.y_cv
    if normalize:
        X_train, X_cv = normalize_data(X_train, X_cv)
    print("Training ...")
    print('Dim', 'X', np.shape(X_train), 'y', np.shape(y_train), 'X_cv',
          np.shape(X_cv), 'y_cv', np.shape(y_cv))
    start = time.get_seconds()
    classifier.fit(X_train, y_train)
    print("Scoring...")
    S, E = score_classifier_auc(classifier, X_cv, y_cv, data.y_classes)
    score = 0.5 * (S + E)

    elapsedSecs = time.get_seconds() - start
    print("t=%ds score=%f" % (int(elapsedSecs), score))

    return {'classifier': classifier, 'score': score, 'S_auc': S, 'E_auc': E}
    def process_raw_data(mat_data,splitsize):
        start = time.get_seconds()
        print 'Loading data',
        # print mat_data
        SamplePerFile = []
        X = []
        y = []
        cc = 0
        for segment in mat_data:
            cc += 1
            print cc
            for skey in segment.keys():
                if "data" in skey.lower():
                    mykey = skey
            data = segment[mykey][0][0][0]
            if np.all(data == 0):
                print 'All of data zero, filling random numbers'
                for s in range(int(240000/splitsize)):
                    transformed_data = np.random.randn(transformed_data_length)
                    X.append(transformed_data)
                SamplePerFile.append(int(240000/splitsize))
                continue
            data_tmp = data[np.invert(np.all(data == 0, axis=1))]
            sampleSizeinSecond = data_tmp.shape[0] / 400
            data = data_tmp.transpose()
            axis = data.ndim - 1

            print sampleSizeinSecond

            '''DataSampleSize: split the 10 minutes data into several clips:
            For one second data clip, patient1 and patient2 were finished in 3 hours. Dog1 clashed after 7+ hours for out of memory
            try ten second data clip
            '''
            DataSampleSize = splitsize  # data.shape[1] / (totalSample * 1.0)  # try to split data into equal size
            splitIdx = np.arange(DataSampleSize, data.shape[1], DataSampleSize)
            splitIdx = np.int32(np.ceil(splitIdx))
            splitData = np.hsplit(data, splitIdx)
            SPF = 0
            #pre_sample_size = 0
            #channel = 16
            # if target == '2':
            #     channel = 14
            for s in splitData:
                transformed_data = pipeline.apply(s)
                X.append(transformed_data)
                SPF += 1
            SamplePerFile.append(SPF)
            print 'done'
            transformed_data_length=transformed_data.shape[0]
        X = np.array(X)
        print 'X', X.shape
        return X, SamplePerFile
示例#31
0
    def process_raw_data(mat_data):
        start = time.get_seconds()
        print 'Loading data',
        X = []
        y = []
        prev_data = None

        for segment in mat_data:
            data = segment['data']
            yvalue = 1 if preictal else 0
            transformed_data = pipeline.apply(data)

            if gen_preictal and prev_data is not None:
              axis = prev_data.ndim - 1
              def split(d):
                return np.split(d, 2, axis=axis)
              new_data = np.concatenate((split(prev_data)[1], split(data)[0]), axis=axis)
              transformed_new_data = pipeline.apply(new_data)
              X.append(transformed_new_data)
              y.append(yvalue)

            X.append(transformed_data)
            y.append(yvalue)
            prev_data = data

        print '(%ds)' % (time.get_seconds() - start)

        X = np.array(X)
        y = np.array(y)

        if preictal:
            print 'X', X.shape, 'y', y.shape
            return X, y
        elif interictal:
            print 'X', X.shape, 'y', y.shape
            return X, y
        else:
            print 'X', X.shape
            return X
示例#32
0
def prepare_training_data(ictal_data, interictal_data, cv_ratio, withlatency=False):
    print 'Preparing training data ...',
    ictal_X, ictal_y = flatten(ictal_data.X), ictal_data.y
    interictal_X, interictal_y = flatten(interictal_data.X), interictal_data.y

    # split up data into training set and cross-validation set for both seizure and early sets
    if withlatency:
        ictal_X_train, ictal_y_train, ictal_X_cv, ictal_y_cv = split_train_ictal(ictal_X, ictal_y, ictal_data.latencies, cv_ratio)
    else:
        ictal_X_train, ictal_y_train, ictal_X_cv, ictal_y_cv = split_train_random(ictal_X, ictal_y, cv_ratio)
    interictal_X_train, interictal_y_train, interictal_X_cv, interictal_y_cv = split_train_random(interictal_X, interictal_y, cv_ratio)

    def concat(a, b):
        return np.concatenate((a, b), axis=0)

    X_train = concat(ictal_X_train, interictal_X_train)
    y_train = concat(ictal_y_train, interictal_y_train)
    X_cv = concat(ictal_X_cv, interictal_X_cv)
    y_cv = concat(ictal_y_cv, interictal_y_cv)

    y_classes = np.unique(concat(y_train, y_cv))

    start = time.get_seconds()
    elapsedSecs = time.get_seconds() - start
    print "%ds" % int(elapsedSecs)

    print 'X_train:', np.shape(X_train)
    print 'y_train:', np.shape(y_train)
    print 'X_cv:', np.shape(X_cv)
    print 'y_cv:', np.shape(y_cv)
    print 'y_classes:', y_classes

    return {
        'X_train': X_train,
        'y_train': y_train,
        'X_cv': X_cv,
        'y_cv': y_cv,
        'y_classes': y_classes
    }
    def process_raw_data(mat_data, splitsize):
        start = time.get_seconds()
        print 'Loading data',
        # print mat_data
        SamplePerFile = []
        X = []
        y = []
        cc = 0
        for segment in mat_data:
            cc += 1
            print cc
            for skey in segment.keys():
                if "data" in skey.lower():
                    mykey = skey
            data = segment[mykey][0][0][0]
            if np.all(data == 0):
                print 'All of data zero, filling random numbers'
                for s in range(int(240000 / splitsize)):
                    transformed_data = np.random.randn(transformed_data_length)
                    X.append(transformed_data)
                SamplePerFile.append(int(240000 / splitsize))
                continue
            data_tmp = data[np.invert(np.all(data == 0, axis=1))]
            sampleSizeinSecond = data_tmp.shape[0] / 400
            data = data_tmp.transpose()
            axis = data.ndim - 1

            print sampleSizeinSecond
            '''DataSampleSize: split the 10 minutes data into several clips:
            For one second data clip, patient1 and patient2 were finished in 3 hours. Dog1 clashed after 7+ hours for out of memory
            try ten second data clip
            '''
            DataSampleSize = splitsize  # data.shape[1] / (totalSample * 1.0)  # try to split data into equal size
            splitIdx = np.arange(DataSampleSize, data.shape[1], DataSampleSize)
            splitIdx = np.int32(np.ceil(splitIdx))
            splitData = np.hsplit(data, splitIdx)
            SPF = 0
            #pre_sample_size = 0
            #channel = 16
            # if target == '2':
            #     channel = 14
            for s in splitData:
                transformed_data = pipeline.apply(s)
                X.append(transformed_data)
                SPF += 1
            SamplePerFile.append(SPF)
            print 'done'
            transformed_data_length = transformed_data.shape[0]
        X = np.array(X)
        print 'X', X.shape
        return X, SamplePerFile
示例#34
0
def prepare_training_data(ictal_data, interictal_data, cv_ratio):
    print 'Preparing training data ...',
    ictal_X, ictal_y = flatten(ictal_data.X), ictal_data.y
    interictal_X, interictal_y = flatten(interictal_data.X), interictal_data.y

    # split up data into training set and cross-validation set for both seizure and early sets
    ictal_X_train, ictal_y_train, ictal_X_cv, ictal_y_cv = split_train_ictal(
        ictal_X, ictal_y, ictal_data.latencies, cv_ratio)
    interictal_X_train, interictal_y_train, interictal_X_cv, interictal_y_cv = split_train_random(
        interictal_X, interictal_y, cv_ratio)

    def concat(a, b):
        return np.concatenate((a, b), axis=0)

    X_train = concat(ictal_X_train, interictal_X_train)
    y_train = concat(ictal_y_train, interictal_y_train)
    X_cv = concat(ictal_X_cv, interictal_X_cv)
    y_cv = concat(ictal_y_cv, interictal_y_cv)

    y_classes = np.unique(concat(y_train, y_cv))

    start = time.get_seconds()
    elapsedSecs = time.get_seconds() - start
    print "%ds" % int(elapsedSecs)

    print 'X_train:', np.shape(X_train)
    print 'y_train:', np.shape(y_train)
    print 'X_cv:', np.shape(X_cv)
    print 'y_cv:', np.shape(y_cv)
    print 'y_classes:', y_classes

    return {
        'X_train': X_train,
        'y_train': y_train,
        'X_cv': X_cv,
        'y_cv': y_cv,
        'y_classes': y_classes
    }
示例#35
0
    def process_raw_data(mat_data):
        start = time.get_seconds()
        print 'Loading data',
        #print mat_data
        X = []
        y = []
        previous_transformed_data = []  #used in two window model
        previous_sequence = 0
        for segment in mat_data:
            for skey in segment.keys():
                if "_segment_" in skey.lower():
                    mykey = skey
                    

            if preictal:
                preictual_sequence = segment[mykey][0][0][4][0][0]
                y_value = preictual_sequence    #temporarily set to sequence number 
                if preictual_sequence != previous_sequence+1:
                    previous_transformed_data = []  #if data is not in sequence
                previous_sequence = preictual_sequence  
            elif interictal:
                y_value = 0
                previous_transformed_data = []  #interictal data is not in sequence between files
            else:
                previous_transformed_data = []  #test data is not in sequence between files
                
            
            data = segment[mykey][0][0][0]
            sampleFrequency = segment[mykey][0][0][2][0][0]
            axis = data.ndim - 1
            if sampleFrequency > targetFrequency:   #resample to target frequency
                data = resample(data, targetFrequency*sampleSizeinSecond, axis=axis)

            '''DataSampleSize: split the 10 minutes data into several clips: 
            For one second data clip, patient1 and patient2 were finished in 3 hours. Dog1 clashed after 7+ hours for out of memory
            try ten second data clip
            '''
            DataSampleSize = data.shape[1]/(totalSample *1.0)  #try to split data into equal size
            splitIdx = np.arange(DataSampleSize, data.shape[1], DataSampleSize)
            splitIdx = np.int32(np.ceil(splitIdx))
            splitData = np.hsplit(data,splitIdx)
#             for i  in range(totalSample):
#                 s = splitData[i]
#                 s2 = splitData[i+totalSample]
                
            for s in splitData:
                if s.size > 0:    #is not empty
#                     s = 1.0 * s     #convert int to float
#                     s_scale = preprocessing.scale(s, axis=0, with_std = True)
#                     transformed_data = pipeline.apply([subjectID, s])
                    transformed_data = pipeline.apply(s)
#                     previous_transformed_data.append(transformed_data)
#                         transformed_data2 = pipeline.apply([subjectID, s1])
#                     if len(previous_transformed_data) > totalSample/2:
#                         combined_transformed_data = np.concatenate((transformed_data, previous_transformed_data.pop(0)), axis=transformed_data.ndim-1)
#                         X.append(combined_transformed_data)
                    X.append(transformed_data)
                    if preictal or interictal:
                        y.append(y_value)
                                

        print '(%ds)' % (time.get_seconds() - start)

        X = np.array(X)
        if preictal or interictal:
            y = np.array(y)
            print 'X', X.shape, 'y', y.shape
            return X, y
        else:
            print 'X', X.shape
            return X
示例#36
0
def parse_input_data(filename, ref='None'):
    def read_mat_data(filename):
        if os.path.exists(filename):
            mat_data = scipy.io.loadmat(filename)
        else:
            raise Exception("file %s not found" % filename)
        return mat_data

    def report_time(start):
        print '(%ds)' % (time.get_seconds() - start)
        new_start = time.get_seconds()
        return new_start

    # for each data point in ictal, interictal and test,
    # generate (X, <y>, <latency>) per channel
    def get_data(mat_data, data_type='data', problem_channels=[]):
        print 'Loading data',

        if 'data_behavior' in mat_data:
            dataKey = 'data_behavior'
        elif 'data_3sFIR' in mat_data:
            dataKey = 'data_3sFIR'
        else:
            dataKey = 'data'
        print "mat:", mat_data[dataKey].shape
        data = mat_data[dataKey][0:TOTAL_CH_NUM, :]
        if len(problem_channels) != 0:
            for each_channel in problem_channels:
                data = np.delete(data, each_channel - 1, axis=0)
        if data_type == 'data':
            print 'Data:', data.shape, data
            return data
        elif data_type == 'latencies':
            if mat_data[dataKey].shape[0] > TOTAL_CH_NUM:
                latencies = mat_data[dataKey][TOTAL_CH_NUM, :]
            else:
                latencies = np.zeros(len(data[0]))
            print 'Latencies:', latencies
            return latencies

    def plot_EEG(data):
        """
        Plot out the original EEG signals.
        """
        print 'Plotting out the original EEG signals... ',
        channels_fig = plt.figure()
        x1 = np.arange(START_TIME, END_TIME, 1.0 / SAMPLE_FREQUENCY)
        for i in range(0, CH_NUM):
            plt.subplot(CH_NUM, 1, i + 1)
            plt.plot(
                x1, data[i, START_TIME * SAMPLE_FREQUENCY:END_TIME *
                         SAMPLE_FREQUENCY])
        plt.show()

    def plot_data(data,
                  plot_name='None',
                  s=START_TIME,
                  e=END_TIME,
                  period=1.0 / SAMPLE_FREQUENCY):
        """
        Plot out abitrary data.
        """
        print 'Plotting out figure:', plot_name
        plt.figure()
        plt.title(plot_name)
        x1 = np.arange(s, e, period)
        col = data.shape[0]
        for i in range(0, col):
            if i == 1:
                plt.title(plot_name)
            plt.subplot(col, 1, i + 1)
            plt.plot(x1, data[i])
            plt.ylim(-0.001, 0.001)
        plt.show()

    def calculate_eigenvalue_ref(data, data_type='None'):
        """
        Using sliding window to calculate the change of eigenvalue
        with time.
        """
        print 'Calculating reference change of eigenvalue in ', data_type, ' domain'
        #the change of eigenvalue with time in frequency/time domain
        eigen_ref = []
        for i in range(int(REF_TIME_START * SAMPLE_FREQUENCY),
                       int(REF_TIME_END * SAMPLE_FREQUENCY)):
            if data_type == 'Time':
                data_correlation = transforms.TimeCorrelation_whole(
                    50, 'usf').apply(data[:, i:i + WINDOW_RANGE])
            elif data_type == 'Frequency':
                data_correlation = transforms.FreqCorrelation_whole(
                    1, 50, 'usf').apply(data[:, i:i + WINDOW_RANGE])
            w = transforms.Eigenvalues().apply(data_correlation)
            eigen_ref.append(w)

        eigen_ref = np.array(eigen_ref)
        eigen_ref = np.swapaxes(eigen_ref, 0, 1)
        print data_type, ' eigen ref:', eigen_ref.shape
        print eigen_ref
        return eigen_ref

    def calculate_eigen_change(data, ref_mean, ref_std, data_type='none'):
        eigen_change = []
        for i in range(int(START_TIME * SAMPLE_FREQUENCY),
                       int(END_TIME * SAMPLE_FREQUENCY), SAMPLE_FREQUENCY / 4):
            if (data_type == 'Time'):
                data_correlation = transforms.TimeCorrelation_whole(
                    50, 'usf').apply(data[:, i:i + WINDOW_RANGE])
            elif (data_type == 'Frequency'):
                data_correlation = transforms.FreqCorrelation_whole(
                    1, 50, 'usf').apply(data[:, i:i + WINDOW_RANGE])

            w = transforms.Eigenvalues().apply(data_correlation)
            eigen_change.append(w)

        eigen_change = np.array(eigen_change)
        eigen_change = np.swapaxes(eigen_change, 0, 1)
        print data_type, ' change:', eigen_change
        for i in range(0, eigen_change.shape[1]):
            for j in range(0, CH_NUM):
                if i < 2:
                    print i, j
                    print eigen_change[j][i],
                    print t_eigen_ref_mean[j],
                    print t_eigen_ref_std[j][0]
                eigen_change[j][i] = (eigen_change[j][i] -
                                      ref_mean[j]) / ref_std[j][0]
        print data_type, 'eigen change normalized:', eigen_change.shape
        print eigen_change
        return eigen_change

    def calculate_slope_ref(data):
        """
        Calculate the standard deviation and normalized slope to define seizures.
        """
        print 'Calculating the reference slope and change of slope ... '
        #reference slope
        slope_stats = []
        for i in range(int(REF_TIME_START * SAMPLE_FREQUENCY),
                       int(REF_TIME_END * SAMPLE_FREQUENCY)):
            slopes = []
            for j in range(0, CH_NUM):
                slope = (data[j, i + 1] - data[j, i]) * SAMPLE_FREQUENCY
                slopes.append(slope)
            slope_stats.append(slopes)
        slope_stats = np.array(slope_stats)
        slope_stats = np.swapaxes(slope_stats, 0, 1)
        slope_stats = transforms.Stats().apply(slope_stats)
        print "Slope stats:", slope_stats.shape
        print slope_stats
        return slope_stats

    def calculate_slope_change(data, slope_stats, data_type='change'):
        #change of slope
        #note: smoothed by SMOOTHING_PERIOD s average, calculated for each sec
        slope_change = []
        seizure_num_by_slope = []
        for i in range(int(START_TIME * SAMPLE_FREQUENCY),
                       int(END_TIME * SAMPLE_FREQUENCY), SAMPLE_FREQUENCY):
            seizure_channels_by_slope = 0
            slopes = []
            for j in range(0, CH_NUM):
                average_slope = 0.0
                for k in range(0, int(SMOOTHING_PERIOD * SAMPLE_FREQUENCY)):
                    slope = (data[j, i + 1 + k] -
                             data[j, i + k]) * SAMPLE_FREQUENCY
                    average_slope += abs(slope)
                average_slope /= SMOOTHING_PERIOD * SAMPLE_FREQUENCY
                slope_normalized = abs(average_slope / slope_stats[j][0])
                #slope_normalized = abs(slope / slope_stats[j][0])
                if (slope_normalized > SLOPE_THRESHOLD):
                    seizure_channels_by_slope += 1
                slopes.append(slope_normalized)
            slope_change.append(slopes)
            seizure_num_by_slope.append(seizure_channels_by_slope)

        if data_type == 'change':
            slope_change = np.array(slope_change)
            print 'slope change of each channel', slope_change.shape
            print slope_change
            return slope_change
        elif data_type == 'num':
            seizure_num_by_slope = np.array(seizure_num_by_slope)
            print 'seizure_num_by_slope', seizure_num_by_slope
            return seizure_num_by_slope

    def plot_figures(latencies=[],
                     seizure_num_by_slope=[],
                     slope_change=[],
                     t_eigen_change=[],
                     f_eigen_change=[]):
        #Plot out the seizure period and correlation structure.
        print 'Plotting out the other figures.. ',
        #seizure onset by observation
        fig = plt.figure()
        """
        plt.subplot(ROW_NUM, COL_NUM, 3)
        plt.title('Seizure Time by Behavior')
        x2 = np.arange(START_TIME, END_TIME, 1.0/SAMPLE_FREQUENCY)
        plt.plot(x2, latencies[START_TIME*SAMPLE_FREQUENCY:END_TIME*SAMPLE_FREQUENCY])
        plt.axis([START_TIME, END_TIME, 0, 5])
        plt.xlabel('time(s)')
        plt.ylabel('seizure status')
        """
        #seizure onset by slope_normalized > 2.5
        plt.subplot(ROW_NUM, COL_NUM, 2)
        plt.title('Seizure Time by (Normalized Slope > 2.5) num ')
        #x3 = np.arange(START_TIME, END_TIME, 1.0/SAMPLE_FREQUENCY)
        x3 = np.arange(START_TIME, END_TIME, 1)
        #plt.plot(x3, slope_change)
        plt.plot(x3, seizure_num_by_slope)
        plt.axis([START_TIME, END_TIME, 0, 8])
        plt.ylabel('# of (sn > 2.5)')

        if len(slope_change) != 0:
            #slope change of each channel
            slope_change = np.array(slope_change)
            slope_change = np.swapaxes(slope_change, 0, 1)
            plt.subplot(ROW_NUM, COL_NUM, 1)
            plt.title('Slope change of each channel(moving average by 5 sec)')
            im = plt.imshow(slope_change,
                            origin='lower',
                            aspect='auto',
                            extent=[START_TIME, END_TIME, 1,
                                    CH_NUM])  #,interpolation = 'none')
            plt.ylabel('channel')
            fig.subplots_adjust(right=0.93)
            plt.clim(COLOR_MIN, COLOR_MAX)
            cbax = fig.add_axes([0.94, 0.82, 0.01, 0.12])
            fig.colorbar(im, cax=cbax)
        else:
            #time correlation
            plt.subplot(ROW_NUM, COL_NUM, 1)
            plt.title('Time Domain Correlation Analysis (Normalized)')
            im = plt.imshow(t_eigen_change,
                            origin='lower',
                            aspect='auto',
                            extent=[START_TIME, END_TIME, 0,
                                    CH_NUM])  #, interpolation = 'none')
            plt.ylabel('eigenvalues')
            plt.clim(COLOR_MIN, COLOR_MAX)
            """
            #phase correlation
            #f_eigen_change = np.array(f_eigen_change)
            #f_eigen_change = np.swapaxes(f_eigen_change, 0, 1)
            print "f eigen change", f_eigen_change.shape
            plt.subplot(ROW_NUM, COL_NUM, 2)
            plt.title('Frequency Domain Correlation Analysis (Normalized)')
            im = plt.imshow(f_eigen_change, origin = 'lower',
                    aspect = 'auto', extent = [START_TIME,END_TIME,0,7])#,                interpolation = 'none')
            #plt.colorbar()
            """
            plt.tight_layout()  #adjust the space between plots
            fig.subplots_adjust(right=0.93)
            plt.clim(COLOR_MIN, COLOR_MAX)
            cbax = fig.add_axes([0.94, 0.82, 0.01, 0.12])
            fig.colorbar(im, cax=cbax)

        plt.show()

    start = time.get_seconds()
    initial_start = time.get_seconds()
    mat_data = read_mat_data(filename)
    #data = get_data(mat_data)
    data = get_data(mat_data, problem_channels=PROBLEM_CH)
    start = report_time(start)
    plot_data(data[:,
                   START_TIME * SAMPLE_FREQUENCY:END_TIME * SAMPLE_FREQUENCY],
              plot_name='EEG')
    #plot_EEG(data)
    start = report_time(start)
    if ref != 'None':
        print "Reference Data:", ref
        ref_mat_data = read_mat_data(ref)
        ref_data = get_data(ref_mat_data, problem_channels=PROBLEM_CH)
        plot_data(ref_data[:, REF_TIME_START * SAMPLE_FREQUENCY:REF_TIME_END *
                           SAMPLE_FREQUENCY],
                  plot_name='Reference EEG',
                  s=REF_TIME_START,
                  e=REF_TIME_END)
    else:
        print "Reference Data:", filename
        ref_data = data

    slope_ref = calculate_slope_ref(ref_data)
    #slope_change = calculate_slope_change(data, slope_ref, 'change')
    slope_num = calculate_slope_change(data, slope_ref, 'num')
    start = report_time(start)

    t_eigen_ref = calculate_eigenvalue_ref(ref_data, data_type="Time")
    start = report_time(start)
    t_eigen_ref_std = transforms.Stats().apply(t_eigen_ref)
    print 'ref std:'
    print t_eigen_ref_std
    start = report_time(start)
    t_eigen_ref_mean = np.average(t_eigen_ref, axis=1)
    print 'ref avg:'
    print t_eigen_ref_mean
    start = report_time(start)
    t_eigen_change = calculate_eigen_change(data,
                                            t_eigen_ref_mean,
                                            t_eigen_ref_std,
                                            data_type='Time')
    start = report_time(start)

    f_eigen_ref = calculate_eigenvalue_ref(ref_data, data_type='Frequency')
    f_eigen_ref_std = transforms.Stats().apply(f_eigen_ref)
    f_eigen_ref_mean = np.average(f_eigen_ref, axis=1)
    f_eigen_change = calculate_eigen_change(data,
                                            f_eigen_ref_mean,
                                            f_eigen_ref_std,
                                            data_type='Frequency')
    start = report_time(start)
    plot_figures(
        latencies=get_data(mat_data, 'latencies'),
        seizure_num_by_slope=slope_num,
        # slope_change = slope_change,
        t_eigen_change=t_eigen_change,
        f_eigen_change=f_eigen_change)
    """
    plot_figures(latencies = get_data(mat_data, 'latencies'), seizure_num_by_slope = slope_num,
            slope_change = slope_change)

    """
    print '======================'
    print 'Total time:',
    start = report_time(initial_start)
    print
示例#37
0
def load_training_data(settings, target, pipeline, check_only, strategy=None, cv_fold_number=None, quiet=False):
    cv = cv_fold_number is not None
    if check_only:
        return load_pipeline_data(settings, target, 'preictal', pipeline, check_only=True, quiet=quiet) or \
               load_pipeline_data(settings, target, 'interictal', pipeline, check_only=True, quiet=quiet)

    preictal, preictal_meta = load_pipeline_data(settings, target, 'preictal', pipeline, check_only=False, quiet=quiet)
    interictal, interictal_meta = load_pipeline_data(settings, target, 'interictal', pipeline, check_only=False, quiet=quiet)

    total_segments = preictal_meta.num_segments + interictal_meta.num_segments
    # print 'total_segments', total_segments

    if not quiet: print 'Preparing data ...',
    start = time.get_seconds()

    def make_fold(preictal_X_train, preictal_X_cv, interictal_X_train, interictal_X_cv):
        num_train_segments = preictal_X_train.shape[0] + interictal_X_train.shape[0]
        num_cv_segments = preictal_X_cv.shape[0] + interictal_X_cv.shape[0]
        assert (num_train_segments + num_cv_segments) == total_segments

        flattened_preictal_X_train = flatten(preictal_X_train)
        flattened_interictal_X_train = flatten(interictal_X_train)
        flattened_preictal_X_cv = flatten(preictal_X_cv) if cv else np.empty((0,))
        flattened_interictal_X_cv = flatten(interictal_X_cv) if cv else np.empty((0,))

        X_train = np.concatenate((flattened_preictal_X_train, flattened_interictal_X_train), axis=0)
        X_cv = np.concatenate((flattened_preictal_X_cv, flattened_interictal_X_cv), axis=0)

        preictal_y_train = np.ones((flattened_preictal_X_train.shape[0],))
        preictal_y_cv = np.ones((preictal_X_cv.shape[0],))
        interictal_y_train = np.zeros((flattened_interictal_X_train.shape[0],))
        interictal_y_cv = np.zeros((interictal_X_cv.shape[0],))

        y_train = np.concatenate((preictal_y_train, interictal_y_train), axis=0)
        y_cv = np.concatenate((preictal_y_cv, interictal_y_cv), axis=0)

        X_train, y_train = sklearn.utils.shuffle(X_train, y_train, random_state=0)

        return jsdict({
            'X_train': X_train,
            'y_train': y_train,
            'X_cv': X_cv,
            'y_cv': y_cv,
            'num_train_segments': num_train_segments,
            'num_cv_segments': num_cv_segments
        })

    if cv:
        preictal_X_train, preictal_X_cv = strategy.split_train_cv(preictal, preictal_meta, cv_fold_number)
        interictal_X_train, interictal_X_cv = strategy.split_train_cv(interictal, interictal_meta, cv_fold_number, interictal=True)
        data = make_fold(preictal_X_train, preictal_X_cv, interictal_X_train, interictal_X_cv)
    else:
        preictal_X_train = preictal
        preictal_X_cv = np.empty((0,))
        interictal_X_train = interictal
        interictal_X_cv = np.empty((0,))
        data = make_fold(preictal_X_train, preictal_X_cv, interictal_X_train, interictal_X_cv)

    if not quiet: print '%ds' % (time.get_seconds() - start)

    if not quiet: print 'X_train', data.X_train.shape, 'y_train', data.y_train.shape, 'X_cv', data.X_cv.shape, 'y_cv', data.y_cv.shape

    return data
    def process_raw_data(mat_data, splitsize):
        start = time.get_seconds()
        print 'Loading data',
        X = []
        y = []
        h_num = []
        cc = 0
        hour_num = 0
        pre_sequence_num = 0
        for segment in mat_data:
            cc += 1
            print cc
            for skey in segment.keys():
                if "data" in skey.lower():
                    mykey = skey
            try:
                sequence_num = segment[mykey][0][0][4][0][0]
            except:
                sequence_num = random.randint(1, 6)
            print 'seq: %d' % (sequence_num)
            if sequence_num == pre_sequence_num + 1:
                hour_num = hour_num
            else:
                hour_num += 1
            print "hour_num: %d" % (hour_num)
            pre_sequence_num = sequence_num
            if preictal:
                try:
                    preictual_sequence = segment[mykey][0][0][4][0][0]
                except:
                    preictual_sequence = 1
                else:
                    pass
                y_value = preictual_sequence  # temporarily set to sequence number
            elif interictal:
                y_value = 0

            data = segment[mykey][0][0][0]
            # if target == '2':
            #     data = np.delete(data, [3, 9], 1)
            data_tmp = data[np.invert(np.all(data == 0, axis=1))]
            if data_tmp.shape[0] <= 2000:
                print 'too much zeros, skipping'
                continue
            sampleSizeinSecond = data_tmp.shape[0] / 400
            data = data_tmp.transpose()
            axis = data_tmp.ndim - 1
            # tic=time.get_seconds()
            print sampleSizeinSecond
            '''DataSampleSize: split the 10 minutes data into several clips:
            For one second data clip, patient1 and patient2 were finished in 3 hours. Dog1 clashed after 7+ hours for out of memory
            try ten second data clip
            '''
            DataSampleSize = splitsize  # data.shape[1]/(totalSample *1.0)  #try to split data into equal size
            splitIdx = np.arange(DataSampleSize, data.shape[1], DataSampleSize)
            splitIdx = np.int32(np.ceil(splitIdx))
            splitData = np.hsplit(data, splitIdx)
            SPF = 0
            for s in splitData:
                if s.shape[1] < 5000:  #is not so sparse
                    continue

                else:
                    transformed_data = pipeline.apply(s)
                    X.append(transformed_data)
                    y.append(y_value)
                    h_num.append(hour_num)
                    SPF += 1
                    if np.any(np.isnan(transformed_data)) or np.any(
                            np.isinf(transformed_data)):
                        print 'bug'
            print 'done'

        print '(%ds)' % (time.get_seconds() - start)

        X = np.array(X)
        y = np.array(y)
        h_num = np.array(h_num)
        print 'X', X.shape, 'y', y.shape
        return X, y, h_num
示例#39
0
    def test(self, X, Y, show_plots=True, bias_name=0, training_samples = 'training', soz_ch_ids=None, sel_win_num=None, clip_sizes=None):
        print('testing ..')
        start_time = time.get_seconds()
        bias_name = 0
#         self.test_num_to_load = X.shape[0]* X.shape[1]
#         self._test_graphL()
#         self._test_loss()
#         config = tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)
#         config.gpu_options.allow_growth = True
        #config.gpu_options.per_process_gpu_memory_fraction = GPU_MEM_FRACTION
#         config.allow_soft_placement = True
#         self.sess = tf.Session() #config=config
#         self.sess.run(tf.global_variables_initializer())
        
        
#         for counter in np.arange(X.shape[0]):            
#             feed_dict = {self.placeholders['X']: np.squeeze(X[counter,:,:,:]),
#                          self.placeholders['Y']: np.squeeze(Y[counter,:])}
#             outs = self.sess.run([self.test_graphL_W, self.test_loss, self.test_pred_classes], feed_dict=feed_dict)
#             if(show_plots):
#                 plotting_weights('MIT_plots', str(counter+bias_name), outs[0], intervals_seizures=np.squeeze(Y[counter,:]), estimated_states=outs[2])
#             print('    loss: ', outs[1])

#         Y_flat = X # np.reshape(Y,(Y.shape[0] * Y.shape[1],))
#         X_flat = Y # np.reshape(X,(X.shape[0] * X.shape[1], X.shape[2], X.shape[3]))
#         feed_dict = {self.placeholders['X']: X_flat, self.placeholders['Y']: Y_flat}
        classif_minibatch = miniBatchIterator(self.graphL_minibatch, self.classif_core.batch_size, self.placeholders, X, Y, clip_sizes=clip_sizes)
        y_hat = None
        prob_hat = None
        counter = 0
        while(not classif_minibatch.end()):
            feed_dict = classif_minibatch.next()
#             outs = self.sess.run([self.test_graphL_W, self.test_loss, self.test_pred_classes, self.test_pred_probas], feed_dict=feed_dict)
            outs = self.sess.run([self.graphL_W, self.loss, self.pred_classes, self.pred_probas], feed_dict=feed_dict)
            inn_y = outs[2]
            inn_prob = outs[3][:,1]
            start_idx, end_idx = classif_minibatch.current_idx()
            true_y = Y[start_idx:end_idx]
            inn_soz_ch_ids = soz_ch_ids[start_idx:end_idx]
            inn_sel_win_num = sel_win_num[start_idx:end_idx]
            if(classif_minibatch.end()):
                sel_idxx = range(inn_y.size-(Y.size-y_hat.size),inn_y.size)
                inn_y = inn_y[sel_idxx]
                inn_prob = inn_prob[sel_idxx]
                true_y = true_y[sel_idxx]
                inn_soz_ch_ids = inn_soz_ch_ids[sel_idxx]
                inn_sel_win_num = inn_sel_win_num[sel_idxx]
                
            y_hat = inn_y if y_hat is None else np.concatenate((y_hat, inn_y))
            prob_hat = inn_prob if prob_hat is None else np.concatenate((prob_hat, inn_prob), axis=0)
#             print('prob hat shape: ', prob_hat.shape)
#             print('batch num: ', classif_minibatch.batch_num)
            
            change_arg = np.squeeze(np.argwhere(true_y!=0))
            if(show_plots and change_arg.size!=0):
                feats = outs[0]
                change_arg = list(np.arange(np.max((0,change_arg[0]-15)),change_arg[0],1)) + list(change_arg) # + list(np.arange(change_arg[-1], np.min((true_y.size,change_arg[-1]+3)),1))
#                     change_arg = list(np.arange(9))
                print('change_arg: ', change_arg)
                feats = [feats[int(i)] for i in change_arg]
                inn_y = inn_y[change_arg]
                true_y = true_y[change_arg]
                inn_soz_ch_ids = inn_soz_ch_ids[change_arg]
                inn_sel_win_num = inn_sel_win_num[change_arg]
                plotting_weights('EU_plots/', training_samples+str(counter+bias_name), feats, intervals_seizures=true_y, \
                                 estimated_states=inn_y, soz_ch_ids=inn_soz_ch_ids, sel_win_num=inn_sel_win_num)
                counter += 1
            
        print('    loss: ', outs[1])
#         if(self.load_Core.num_classes==2):
        eval_performance(Y, y_hat, prob_hat, training_samples)  
        print('    time elapsed: ', time.get_seconds()-start_time)
示例#40
0
    def process_raw_data(mat_data, with_latency):
        start = time.get_seconds()
        print 'Loading data',
        X = []
        y = []
        latencies = []

        prev_data = None
        prev_sequence = None
        prev_latency = None
        for segment in mat_data:
            if task_predict:
                for key in segment.keys():
                    if not key.startswith('_'):
                        break
                data = segment[key]['data'][0,0]
                if key.startswith('preictal') or key.startswith('interictal'):
                    sequence = segment[key]['sequence'][0,0][0,0]
                else:
                    sequence = None
            else:
                data = segment['data']
                sequence = None
            if pipeline is not None:
                transformed_data = pipeline.apply(data)
            else:
                transformed_data = data


            if with_latency:
                # this is ictal
                latency = segment['latency'][0]
                if latency <= 15:
                    y_value = 0 # ictal <= 15
                else:
                    y_value = 1 # ictal > 15

                # generate extra ictal training data by taking 2nd half of previous
                # 1-second segment and first half of current segment
                # 0.5-1.5, 1.5-2.5, ..., 13.5-14.5, ..., 15.5-16.5
                # cannot take half of 15 and half of 16 because it cannot be strictly labelled as early or late
                if gen_ictal and prev_data is not None and prev_latency + 1 == latency and prev_latency != 15:
                    # gen new data :)
                    axis = prev_data.ndim - 1
                    def split(d):
                        return np.split(d, 2, axis=axis)
                    new_data = np.concatenate((split(prev_data)[1], split(data)[0]), axis=axis)
                    if pipeline is not None:
                        X.append(pipeline.apply(new_data))
                    else:
                        X.append(new_data.copy())
                    y.append(y_value)
                    latencies.append(latency - 0.5)

                y.append(y_value)
                latencies.append(latency)

                prev_latency = latency
            elif y is not None:
                # this is interictal
                label = 0 if key.startswith('preictal') else 2
                if key.startswith('preictal') or key.startswith('interictal'):
                    # generate extra training data by taking overlaps with previous
                    # segment
                    # negative gen_ictal indicates we want to correct for DC jump between segments
                    # non integer value indicates we want to generate overlaps also for negative examples
                    ng = abs(int(gen_ictal)) # number of overlapping windows
                    if (gen_ictal and
                            (key.startswith('preictal') or gen_ictal != int(gen_ictal)) and
                                prev_data is not None and prev_sequence+1 == sequence):
                        if isinstance(gen_ictal,bool) or gen_ictal > 0:
                            new_data = np.concatenate((prev_data, data), axis=-1)
                        else:
                            # see 140922-signal-crosscorelation
                            # it looks like each segment was scaled to have DC=0
                            # however different segments will be scaled differently
                            # as result you can't concatenate sequential segments
                            # without undoing the relative offset

                            # import scipy.signal
                            # # we want to filter the samples so as to not be sensitive to change in the signal itself
                            # # over the distance of one sample (1/Fs). Taking 100 samples sounds safe enough.
                            # normal_cutoff = 2./100. # 1/100*Fs in Hz
                            # order = 6
                            # b, a = scipy.signal.butter(order, normal_cutoff, btype='low', analog=False)
                            # # use filtfilt to get zero phase http://wiki.scipy.org/Cookbook/FiltFilt
                            # W1 = 5000
                            # x1 = scipy.signal.filtfilt(b, a, prev_data[:,-W1:])
                            # # we want the first sample of data after fitering so we will run it backward through
                            # # the filter
                            # x2 = scipy.signal.filtfilt(b, a, data[:,W1-1::-1])
                            # # the first sample of data should be about the same as the last sample of prev_data
                            # data_offset = x2[:,-1] - x1[:,-1]
                            if data.shape[1] > 5*60*5000: # only Patients need offset correction
                                data_offset = data[:,:1].mean(axis=-1) - prev_data[:,-1:].mean(axis=-1)
                                data -= data_offset.reshape(-1,1)
                            new_data = np.concatenate((prev_data, data), axis=-1)

                        # jump = np.mean(np.abs(prev_data[:,-1]-data[:,0])*2./(np.std(prev_data[:,-4000:],axis=-1)+np.std(data[:,:4000],axis=-1)))
                        # if jump < 0.7:
                        # if ng==1:
                        #     # gen new data :)
                        #     axis = prev_data.ndim - 1
                        #     def split(d):
                        #         return np.split(d, 2, axis=axis)
                        #     new_data = np.concatenate((split(prev_data)[1], split(data)[0]), axis=axis)
                        #     X.append(pipeline.apply(new_data))
                        #     y.append(0) # seizure
                        #     latencies.append(sequence-0.5)
                        # else:
                        n = data.shape[1]
                        s = n / (ng + 1.)
                        # new_data = np.concatenate((prev_data, data), axis=-1)
                        for i in range(1,ng+1):
                            start = int(s*i)
                            if pipeline is not None:
                                X.append(pipeline.apply(new_data[:,start:(start+n)]))
                            else:
                                X.append(new_data[:,start:(start+n)].copy())
                            y.append(label) # seizure
                            latencies.append(sequence-1.+i/(ng+1.))
                    y.append(label) # seizure
                    latencies.append(float(sequence))
                else:
                    y.append(label) # no seizure

            X.append(transformed_data)
            prev_data = data
            prev_sequence = sequence

        print '(%ds)' % (time.get_seconds() - start)

        X = np.array(X)
        y = np.array(y)
        latencies = np.array(latencies)

        if ictal or preictal or interictal:
            print 'X', X.shape, 'y', y.shape, 'latencies', latencies.shape
            return X, y, latencies
        # elif interictal:
        #     print 'X', X.shape, 'y', y.shape
        #     return X, y
        else:
            print 'X', X.shape
            return X
def main():
    current_path = os.path.dirname(os.path.abspath(__file__))
    print 'Current path: ', current_path
    file_path = input_filename(current_path)
    ref = input_filename(current_path, 'Ref')

    
    print 'Test file: ', file_path
    print 'Reference file: ', ref
    print
    start_time = input_variable('start_time to calculate')
    end_time = input_variable('end_time to calculate')

    start = time.get_seconds()
    initial_start = time.get_seconds()
    mat_data = read_mat_data(file_path)
    data = get_data(mat_data, problem_channels = PROBLEM_CH)
    start = report_time(start)
    plot_data(data[:, start_time*SAMPLE_FREQUENCY:end_time*SAMPLE_FREQUENCY],start_time = start_time, end_time = end_time, plot_name = 'Exp EEG')
    #plot_data(data[STFT_CH-1:STFT_CH,self.s*SAMPLE_FREQUENCY:self.e*SAMPLE_FREQUENCY], plot_name = 'Exp EEG')
    start = report_time(start)

    latencies = get_data(mat_data, 'latencies')

    ref_mat_data = read_mat_data(ref)
    ref_data = get_data(ref_mat_data, problem_channels = PROBLEM_CH)
    plot_data(ref_data[:,REF_TIME_START*SAMPLE_FREQUENCY:REF_TIME_END*SAMPLE_FREQUENCY], plot_name = 'Reference EEG', start_time = REF_TIME_START, end_time = REF_TIME_END)
    start = report_time(start)
    
    do_slope = do_eigen = do_stft = do_corr =False
    do_slope = input_yes_or_no('If do slope?')
    do_eigen = input_yes_or_no('If do correlation structure(eigenvalues)?')
    do_stft = input_yes_or_no('If do STFT?')
    do_corr = input_yes_or_no('If do correlation sum?')
    if (do_slope == False and do_eigen == False and do_stft == False and do_corr == False):
        print 'Nothing to calculate.'
        return restart()
    if (do_corr == True):
        print '============================================================================'
        print '==                                                                        =='
        print '== Note:                                                                  =='
        print '== \'Correlation sum\' is invented by the author of this program,           =='
        print '==  no reference paper,                                                   =='
        print '==  not sure if there is a similar method by others,                      =='
        print '==  not sure if it works well.                                            =='
        print '==  Please check the reliability and inform the author for further usage. =='
        print '==                                                                        =='
        print '============================================================================'
        if not (input_yes_or_no('Read and agree the above?')):
            print 'Not agree.'
            return restart()


    c = do_calculation(start_time, end_time)
    if (do_slope):
        print
        print '=============='
        print '== Do slope =='
        print '=============='
        slope_ref = c.calculate_slope_ref(ref_data)
        #slope_change = calculate_slope_change(data, slope_ref, 'change')
        slope_num = c.calculate_slope_change(data, slope_ref, 'num')
        start = report_time(start)
        print 'ref_data', ref_data.shape
        #stft_change_ref = calculate_stft(ref_data[:, REF_TIME_START*SAMPLE_FREQUENCY:REF_TIME_END*SAMPLE_FREQUENCY], 1, 50)
    if (do_eigen):
        print
        print '=============='
        print '== Do eigen =='
        print '=============='
  
        t_eigen_ref = c.calculate_eigenvalue_ref(ref_data, data_type = "Time")
        start = report_time(start)
        t_eigen_ref_std = transforms.Stats().apply(t_eigen_ref)
        print 'ref std:', t_eigen_ref_std.shape
        start = report_time(start)
        t_eigen_ref_mean = np.average(t_eigen_ref, axis = 1)
        print 'ref avg:', t_eigen_ref_mean.shape
        start = report_time(start)
        t_eigen_change = c.calculate_eigen_change(data, t_eigen_ref_mean, t_eigen_ref_std, data_type = 'Time')
        start = report_time(start)
 
        f_eigen_ref = c.calculate_eigenvalue_ref(ref_data, data_type = 'Frequency')
        f_eigen_ref_std = transforms.Stats().apply(f_eigen_ref)
        f_eigen_ref_mean = np.average(f_eigen_ref, axis = 1)
        f_eigen_change = c.calculate_eigen_change(data, f_eigen_ref_mean, f_eigen_ref_std, data_type = 'Frequency')

    if (do_stft):
        print
        print '============='
        print '== Do STFT =='
        print '============='
        stft_change = c.calculate_stft(data, ref = ref_data[:, REF_TIME_START*SAMPLE_FREQUENCY:REF_TIME_END*SAMPLE_FREQUENCY])
        start = report_time(start)
        stft_change = np.swapaxes(stft_change, 0 , 1)
        print 'stft change swap', stft_change.shape

    if (do_corr):
        print
        print '========================'
        print '== Do correlation sum =='
        print '========================'
        corr_change_ref = c.calculate_corr_ref(ref_data, data_type = 'Time')
        corr_change_ref_std = transforms.Stats().apply(corr_change_ref)
        corr_change_ref_mean = np.average(corr_change_ref, axis = 1)
        start = report_time(start)
        corr_change = c.calculate_corr_change(data, corr_change_ref_mean, corr_change_ref_std, data_type = 'Time')


    print
    if input_yes_or_no('If plot out the results?'):
        def check_plot_options():
            latencies_t = slope_num_t =  slope_change_t = t_eigen_change_t = f_eigen_change_t = stft_change_t = corr_change_t = []
            stft_ch_t = -1
            num_of_figures = 0
            if input_yes_or_no('If show behavior on the plot?'):
                latencies_t = latencies
                num_of_figures += 1
            if do_slope:
                if input_yes_or_no('If show slope?'):
                    slope_num_t = slope_num
                    num_of_figures += 1
            if do_eigen:
                if input_yes_or_no('If show time domain correlation structure?'):
                    t_eigen_change_t = t_eigen_change
                    num_of_figures += 1
                if input_yes_or_no('If show frequency domain correlation strucure?'):
                    f_eigen_change_t = f_eigen_change
                    num_of_figures += 1
            if do_stft:
                if input_yes_or_no('If show Short time Fourier transform(STFT)?\n Note: cannot print STFT with correlatioin structure.'):
                    stft_ch_t = input_variable('STFT channel to show:')
                    stft_change_t = stft_change[stft_ch_t-1]
                    num_of_figures += 2
            if do_corr:
                if input_yes_or_no('If show correlation sum?'):
                    corr_change_t = corr_change
                    num_of_figures += 1

            p = c.plot_figures(latencies = latencies_t, seizure_num_by_slope = slope_num_t,
                   slope_change = slope_change_t,
                   t_eigen_change = t_eigen_change_t,
                   f_eigen_change = f_eigen_change_t,
                   stft_change = stft_change_t,
                   stft_ch = stft_ch_t,
                   corr_change = corr_change_t,
                   number_of_figures = num_of_figures
                    )
            print
            if not p :
                print 'Plotted nothing'
            print 
            if input_yes_or_no('Plot again?'):
                return check_plot_options()
            else:
                return False
        check_plot_options()
    if do_slope:
        if (input_yes_or_no('Save slope num data?')):
            filename = os.path.basename(file_path)
            savefilename = os.path.join(current_path, '%s_slope_%ds_%ds'%(filename, start_time, end_time))
            scipy.io.savemat(savefilename, {'slope_num':slope_num, 'start_time':start_time, 'end_time':end_time})
            print 'Saved file:%s.mat' % savefilename
            print
    if do_eigen:
        if (input_yes_or_no('Save correlation structure data?')):
            filename = os.path.basename(file_path)
            savefilename = os.path.join(current_path, '%s_correlation_structure_%ds_%ds'%(filename, start_time, end_time))
            scipy.io.savemat(savefilename, {'time_corr_struct':t_eigen_change, 'freq_corr_struct': f_eigen_change, 
            'start_time':start_time, 'end_time':end_time})
            print 'Saved file:%s.mat' % savefilename
            print
    if do_stft:
        if (input_yes_or_no('Save STFT data?')):
            filename = os.path.basename(file_path)
            savefilename = os.path.join(current_path, '%s_stft_%ds_%ds'%(filename, start_time, end_time))
            scipy.io.savemat(savefilename, {'stft':stft_change, 'start_time':start_time, 'end_time':end_time})
            print 'Saved file:%s.mat' % savefilename
            print
    if do_corr:
        if (input_yes_or_no('Save correlation sum data?')):
            filename = os.path.basename(file_path)
            savefilename = os.path.join(current_path, '%s_corr_%ds_%ds'%(filename, start_time, end_time))
            scipy.io.savemat(savefilename, {'corr':corr_change, 'start_time':start_time, 'end_time':end_time})
            print 'Saved file:%s.mat' % savefilename
            print


    print
    print '======================'
    print 'Total time:', 
    print
    start = report_time(initial_start)
    print
    return restart()
def report_time(start):
    print '(Used %dsec)' % (time.get_seconds() - start)
    new_start = time.get_seconds()
    return new_start
    def process_raw_data(mat_data,splitsize):
        start = time.get_seconds()
        print 'Loading data',
        X = []
        y = []
        h_num = []
        cc = 0
        hour_num = 0
        pre_sequence_num = 0
        for segment in mat_data:
            cc += 1
            print cc
            for skey in segment.keys():
                if "data" in skey.lower():
                    mykey = skey
            try:
                sequence_num = segment[mykey][0][0][4][0][0]
            except:
                sequence_num = random.randint(1, 6)
            print 'seq: %d' % (sequence_num)
            if sequence_num == pre_sequence_num + 1:
                hour_num = hour_num
            else:
                hour_num += 1
            print "hour_num: %d" % (hour_num)
            pre_sequence_num = sequence_num
            if preictal:
                try:
                    preictual_sequence = segment[mykey][0][0][4][0][0]
                except:
                    preictual_sequence = 1
                else:
                    pass
                y_value = preictual_sequence  # temporarily set to sequence number
            elif interictal:
                y_value = 0

            data = segment[mykey][0][0][0]
            # if target == '2':
            #     data = np.delete(data, [3, 9], 1)
            data_tmp = data[np.invert(np.all(data==0, axis=1))]
            if data_tmp.shape[0]<=2000:
                 print 'too much zeros, skipping'
                 continue
            sampleSizeinSecond = data_tmp.shape[0] / 400
            data = data_tmp.transpose()
            axis = data_tmp.ndim - 1
            # tic=time.get_seconds()
            print sampleSizeinSecond
            '''DataSampleSize: split the 10 minutes data into several clips:
            For one second data clip, patient1 and patient2 were finished in 3 hours. Dog1 clashed after 7+ hours for out of memory
            try ten second data clip
            '''
            DataSampleSize = splitsize  # data.shape[1]/(totalSample *1.0)  #try to split data into equal size
            splitIdx = np.arange(DataSampleSize, data.shape[1], DataSampleSize)
            splitIdx = np.int32(np.ceil(splitIdx))
            splitData = np.hsplit(data, splitIdx)
            SPF = 0
            for s in splitData:
                if s.shape[1] < 5000:  #is not so sparse
                    continue

                else:
                    transformed_data = pipeline.apply(s)
                    X.append(transformed_data)
                    y.append(y_value)
                    h_num.append(hour_num)
                    SPF += 1
                    if np.any(np.isnan(transformed_data)) or np.any(np.isinf(transformed_data)):
                        print 'bug'
            print 'done'

        print '(%ds)' % (time.get_seconds() - start)

        X = np.array(X)
        y = np.array(y)
        h_num = np.array(h_num)
        print 'X', X.shape, 'y', y.shape
        return X, y, h_num
示例#44
0
    def process_raw_data(mat_data):
        start = time.get_seconds()
        print('Loading data', end=' ')
        #print mat_data
        X = []
        y = []
        previous_transformed_data = []  #used in two window model
        previous_sequence = 0
        for segment in mat_data:
            for skey in list(segment.keys()):
                if "_segment_" in skey.lower():
                    mykey = skey

            if preictal:
                preictual_sequence = segment[mykey][0][0][4][0][0]
                y_value = preictual_sequence  #temporarily set to sequence number
                if preictual_sequence != previous_sequence + 1:
                    previous_transformed_data = []  #if data is not in sequence
                previous_sequence = preictual_sequence
            elif interictal:
                y_value = 0
                previous_transformed_data = [
                ]  #interictal data is not in sequence between files
            else:
                previous_transformed_data = [
                ]  #test data is not in sequence between files

            data = segment[mykey][0][0][0]
            sampleFrequency = segment[mykey][0][0][2][0][0]
            axis = data.ndim - 1
            if sampleFrequency > targetFrequency:  #resample to target frequency
                data = resample(data,
                                targetFrequency * sampleSizeinSecond,
                                axis=axis)
            '''DataSampleSize: split the 10 minutes data into several clips: 
            For one second data clip, patient1 and patient2 were finished in 3 hours. Dog1 clashed after 7+ hours for out of memory
            try ten second data clip
            '''
            DataSampleSize = data.shape[1] / (
                totalSample * 1.0)  #try to split data into equal size
            splitIdx = np.arange(DataSampleSize, data.shape[1], DataSampleSize)
            splitIdx = np.int32(np.ceil(splitIdx))
            splitData = np.hsplit(data, splitIdx)
            #             for i  in range(totalSample):
            #                 s = splitData[i]
            #                 s2 = splitData[i+totalSample]

            for s in splitData:
                if s.size > 0:  #is not empty
                    #                     s = 1.0 * s     #convert int to float
                    #                     s_scale = preprocessing.scale(s, axis=0, with_std = True)
                    #                     transformed_data = pipeline.apply([subjectID, s])
                    transformed_data = pipeline.apply(s)
                    #                     previous_transformed_data.append(transformed_data)
                    #                         transformed_data2 = pipeline.apply([subjectID, s1])
                    #                     if len(previous_transformed_data) > totalSample/2:
                    #                         combined_transformed_data = np.concatenate((transformed_data, previous_transformed_data.pop(0)), axis=transformed_data.ndim-1)
                    #                         X.append(combined_transformed_data)
                    X.append(transformed_data)
                    if preictal or interictal:
                        y.append(y_value)

        print('(%ds)' % (time.get_seconds() - start))

        X = np.array(X)
        if preictal or interictal:
            y = np.array(y)
            print('X', X.shape, 'y', y.shape)
            return X, y
        else:
            print('X', X.shape)
            return X
示例#45
0
    def predict_all(make_predictions):
        for pipeline in pipelines:
            for (classifier, classifier_name) in classifiers:
                print('Using pipeline %s with classifier %s' %
                      (pipeline.get_name(), classifier_name))
                lines = ['clip,preictal']
                subjectID = 0
                X_train = y_train = X_test = test_size = []
                for target in targets:
                    task_core = TaskCore(
                        cached_data_loader=cached_data_loader,
                        data_dir=data_dir,
                        target=target,
                        pipeline=pipeline,
                        classifier_name=classifier_name,
                        classifier=classifier,
                        normalize=should_normalize(classifier),
                        gen_preictal=pipeline.gen_preictal,
                        cv_ratio=cv_ratio)

                    data = GetCrossSubjectDataTask(task_core).run()
                    #                     a = np.shape(data.X_test)[0]
                    test_size.append(np.shape(data.X_test)[0])
                    if subjectID > 0:
                        X_train = np.concatenate((X_train, data.X_train),
                                                 axis=0)
                        y_train = np.concatenate((y_train, data.y_train),
                                                 axis=0)
                        X_test = np.concatenate((X_test, data.X_test), axis=0)
                    else:
                        X_train = data.X_train
                        y_train = data.y_train
                        X_test = data.X_test
                    subjectID += 1

                #Training
                task_core = TaskCore(cached_data_loader=cached_data_loader,
                                     data_dir=data_dir,
                                     target=[],
                                     pipeline=pipeline,
                                     classifier_name=classifier_name,
                                     classifier=classifier,
                                     normalize=should_normalize(classifier),
                                     gen_preictal=pipeline.gen_preictal,
                                     cv_ratio=cv_ratio)
                y_train = np.ceil(0.1 * y_train)
                y_train.astype('int_')
                if should_normalize(classifier):
                    X_train, temp = normalize_data(X_train, X_train)

                print("Training ...")
                print('Dim', np.shape(X_train), np.shape(y_train))
                start = time.get_seconds()
                classifier.fit(X_train, y_train)
                elapsedSecs = time.get_seconds() - start
                print("t=%ds" % int(elapsedSecs))

                y_estimate = classifier.predict_proba(X_train)
                lr = LogisticRegression(random_state=0)
                lr.fit(y_estimate, y_train)
                predictions_proba = classifier.predict_proba(X_test)
                predictions_calibrated = lr.predict_proba(predictions_proba)

                #output
                m = 0
                totalSample = 12
                startIdx = 0
                for target in targets:
                    for i in range(test_size[m] / totalSample):
                        j = i + 1
                        if j < 10:
                            nstr = '000%d' % j
                        elif j < 100:
                            nstr = '00%d' % j
                        elif j < 1000:
                            nstr = '0%d' % j
                        else:
                            nstr = '%d' % j

                        preictalOverAllSample = 0
                        for k in range(totalSample):
                            p = predictions_calibrated[i * totalSample + k +
                                                       startIdx]
                            preictal = translate_prediction(p)
                            preictalOverAllSample += preictal / totalSample

                        newline = '%s_test_segment_%s.mat,%.15f' % (
                            target, nstr, preictalOverAllSample)
                        lines.append(newline)

                    print(newline)
                    startIdx = startIdx + test_size[m]
                    m += 1

                filename = 'submission%d-%s_%s.csv' % (ts, classifier_name,
                                                       pipeline.get_name())
                filename = os.path.join(submission_dir, filename)
                with open(filename, 'w') as f:
                    print('\n'.join(lines), file=f)
                print('wrote', filename)
示例#46
0
def load_training_data(settings,
                       target,
                       pipeline,
                       check_only,
                       strategy=None,
                       cv_fold_number=None,
                       quiet=False):
    cv = cv_fold_number is not None
    if check_only:
        return load_pipeline_data(settings, target, 'preictal', pipeline, check_only=True, quiet=quiet) or \
               load_pipeline_data(settings, target, 'interictal', pipeline, check_only=True, quiet=quiet)

    preictal, preictal_meta = load_pipeline_data(settings,
                                                 target,
                                                 'preictal',
                                                 pipeline,
                                                 check_only=False,
                                                 quiet=quiet)
    interictal, interictal_meta = load_pipeline_data(settings,
                                                     target,
                                                     'interictal',
                                                     pipeline,
                                                     check_only=False,
                                                     quiet=quiet)

    total_segments = preictal_meta.num_segments + interictal_meta.num_segments
    # print 'total_segments', total_segments

    if not quiet: print 'Preparing data ...',
    start = time.get_seconds()

    def make_fold(preictal_X_train, preictal_X_cv, interictal_X_train,
                  interictal_X_cv):
        num_train_segments = preictal_X_train.shape[
            0] + interictal_X_train.shape[0]
        num_cv_segments = preictal_X_cv.shape[0] + interictal_X_cv.shape[0]
        assert (num_train_segments + num_cv_segments) == total_segments

        flattened_preictal_X_train = flatten(preictal_X_train)
        flattened_interictal_X_train = flatten(interictal_X_train)
        flattened_preictal_X_cv = flatten(preictal_X_cv) if cv else np.empty(
            (0, ))
        flattened_interictal_X_cv = flatten(
            interictal_X_cv) if cv else np.empty((0, ))

        X_train = np.concatenate(
            (flattened_preictal_X_train, flattened_interictal_X_train), axis=0)
        X_cv = np.concatenate(
            (flattened_preictal_X_cv, flattened_interictal_X_cv), axis=0)

        preictal_y_train = np.ones((flattened_preictal_X_train.shape[0], ))
        preictal_y_cv = np.ones((preictal_X_cv.shape[0], ))
        interictal_y_train = np.zeros(
            (flattened_interictal_X_train.shape[0], ))
        interictal_y_cv = np.zeros((interictal_X_cv.shape[0], ))

        y_train = np.concatenate((preictal_y_train, interictal_y_train),
                                 axis=0)
        y_cv = np.concatenate((preictal_y_cv, interictal_y_cv), axis=0)

        X_train, y_train = sklearn.utils.shuffle(X_train,
                                                 y_train,
                                                 random_state=0)

        return jsdict({
            'X_train': X_train,
            'y_train': y_train,
            'X_cv': X_cv,
            'y_cv': y_cv,
            'num_train_segments': num_train_segments,
            'num_cv_segments': num_cv_segments
        })

    if cv:
        preictal_X_train, preictal_X_cv = strategy.split_train_cv(
            preictal, preictal_meta, cv_fold_number)
        interictal_X_train, interictal_X_cv = strategy.split_train_cv(
            interictal, interictal_meta, cv_fold_number, interictal=True)
        data = make_fold(preictal_X_train, preictal_X_cv, interictal_X_train,
                         interictal_X_cv)
    else:
        preictal_X_train = preictal
        preictal_X_cv = np.empty((0, ))
        interictal_X_train = interictal
        interictal_X_cv = np.empty((0, ))
        data = make_fold(preictal_X_train, preictal_X_cv, interictal_X_train,
                         interictal_X_cv)

    if not quiet: print '%ds' % (time.get_seconds() - start)

    if not quiet:
        print 'X_train', data.X_train.shape, 'y_train', data.y_train.shape, 'X_cv', data.X_cv.shape, 'y_cv', data.y_cv.shape

    return data
示例#47
0
    def process_raw_data(mat_data, with_latency):
        start = time.get_seconds()
        print 'Loading data',
        X = []
        y = []
        latencies = []

        prev_data = None
        prev_latency = None
        for segment in mat_data:
            data = segment['data']
            transformed_data = pipeline.apply(data)

            if with_latency:
                # this is ictal
                latency = segment['latency'][0]
                if latency <= 15:
                    y_value = 0 # ictal <= 15
                else:
                    y_value = 1 # ictal > 15

                # generate extra ictal training data by taking 2nd half of previous
                # 1-second segment and first half of current segment
                # 0.5-1.5, 1.5-2.5, ..., 13.5-14.5, ..., 15.5-16.5
                # cannot take half of 15 and half of 16 because it cannot be strictly labelled as early or late
                if gen_ictal and prev_data is not None and prev_latency + 1 == latency and prev_latency != 15:
                    # gen new data :)
                    axis = prev_data.ndim - 1
                    def split(d):
                        return np.split(d, 2, axis=axis)
                    new_data = np.concatenate((split(prev_data)[1], split(data)[0]), axis=axis)
                    X.append(pipeline.apply(new_data))
                    y.append(y_value)
                    latencies.append(latency - 0.5)

                y.append(y_value)
                latencies.append(latency)

                prev_latency = latency
            elif y is not None:
                # this is interictal
                y.append(2)

            X.append(transformed_data)
            prev_data = data

        print '(%ds)' % (time.get_seconds() - start)

        X = np.array(X)
        y = np.array(y)
        latencies = np.array(latencies)

        if ictal:
            print 'X', X.shape, 'y', y.shape, 'latencies', latencies.shape
            return X, y, latencies
        elif interictal:
            print 'X', X.shape, 'y', y.shape
            return X, y
        else:
            print 'X', X.shape
            return X
示例#48
0
    def train(self, X, Y):
        print('training ..')
        start_time = time.get_seconds()
        self.classif_minibatch = miniBatchIterator(self.graphL_minibatch, self.classif_core.batch_size, self.placeholders, X, Y)
        #config = tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)
        #config.gpu_options.allow_growth = True
        #config.gpu_options.per_process_gpu_memory_fraction = GPU_MEM_FRACTION
        #config.allow_soft_placement = True
        self.sess = tf.Session() #config=config
        self.sess.run(tf.global_variables_initializer())
        total_steps = 0
        avg_time = 0.0
        losses = []
        num_epochs = self.classif_core.epochs 
        outs_before = [0,0,0,[0,0]]
        for epoch in range(num_epochs):
            self.classif_minibatch.shuffle() 
            iter = 0
            while(not self.classif_minibatch.end()):            
                feed_dict = self.classif_minibatch.next()
#                 self.project_GD()
                self.sess.run([self.opt_op], feed_dict=feed_dict)
                outs = self.sess.run([self.loss, self.graphL_W, self.adj_mat, self.variables,
                                       self.loss_class, self.loss_graphL, self.Z, self.Theta],
                                       feed_dict=feed_dict)
                losses.append(outs[0])
#                 print('')
#                 print('    loss; %f, loss-class: %f, loss-graphL: %f' %(outs[0], outs[4], outs[5]))
#                 print('    A after projection: \n', (outs[2]+1)/2)
                if(self.graphL_core.coordinate_gradient):
                    self.adj_mat_coordinate_descent()
                elif(self.graphL_core.projected_gradient):
                    self.project_GD()
                outs_after = self.sess.run([self.adj_mat, self.variables], feed_dict=feed_dict)
#                 print('    A diff-inner: ', np.sum(np.abs(outs_after[0]-outs[2])))
#                 print('    variables0 diff-inner: ', np.sum(np.abs(outs_after[1][0]-outs[3][0])))
#                 print('    variables1 diff-inner: ', np.sum(np.abs(outs_after[1][1]-outs[3][1])))
#                 print('')
#                 A_diff = np.sum(np.abs(outs_after[0]-outs_before[2]))
#                 print('    A diff: ', A_diff) # (1+outs_after[0])/2
#                 print('    variables0 diff: ', np.sum(np.abs(outs_after[1][0]-outs_before[3][0])))
#                 print('    variables1 diff: ', np.sum(np.abs(outs_after[1][1]-outs_before[3][1])))

#                 if(A_diff<self.classif_core.A_proj_th and A_diff>0 and iter>10):
#                     self.project_GD()
                outs_before = outs.copy()
#                 print('Sample Z: ', outs[6][3][0])
#                 print('        : ', outs[6][3][3])
#                 print('Sample W: ', np.reshape(outs[1][3], (self.graphL_core.num_nodes, self.graphL_core.num_nodes)))
#                 np.savetxt('Sample_Z.txt', outs[6][3])
#                 np.savetxt('Sample_W.txt', outs[1][3])
                iter += 1
                total_steps += 1
                if total_steps > self.classif_core.max_total_steps:
                    break
            print('    epoch: ', epoch)
            if total_steps > self.classif_core.max_total_steps:
                    break
        print('    Final A: ', (outs_after[0]+1)/2)
        print('    Final Theta: \n', outs[7])
#         plotting_figure(np.array(losses), 'loss')
        print('    time elapsed: ', time.get_seconds()-start_time)    
示例#49
0
def parse_input_data(filename, ref = 'None'):
    def read_mat_data(filename):
        if os.path.exists(filename):
            mat_data = scipy.io.loadmat(filename)
        else:
            raise Exception("file %s not found" % filename)
        return mat_data
     

    def report_time(start):
        print '(%ds)' % (time.get_seconds() - start)
        new_start = time.get_seconds()
        return new_start

    # for each data point in ictal, interictal and test,
    # generate (X, <y>, <latency>) per channel
    def get_data(mat_data, data_type = 'data', problem_channels = []):
        print 'Loading data',

        if 'data_behavior' in mat_data:
            dataKey = 'data_behavior'
        elif 'data_3sFIR' in mat_data:
            dataKey = 'data_3sFIR'
        else:
            dataKey = 'data'
        print "mat:", mat_data[dataKey].shape
        data = mat_data[dataKey][0:TOTAL_CH_NUM,:]
        if len(problem_channels)!=0:
            for each_channel in problem_channels:
                data = np.delete(data, each_channel-1, axis = 0)
        if data_type == 'data':
            print 'Data:', data.shape, data
            return data
        elif data_type == 'latencies':
            if mat_data[dataKey].shape[0] > TOTAL_CH_NUM:
                latencies = mat_data[dataKey][TOTAL_CH_NUM, :]
            else:
                latencies = np.zeros(len(data[0]))
            print 'Latencies:', latencies
            return latencies


    def plot_EEG(data):
        """
        Plot out the original EEG signals.
        """
        print 'Plotting out the original EEG signals... ',
        channels_fig = plt.figure()
        x1 = np.arange(START_TIME, END_TIME, 1.0/SAMPLE_FREQUENCY)
        for i in range(0,CH_NUM):
            plt.subplot(CH_NUM, 1, i+1)
            plt.plot(x1, data[i,START_TIME*SAMPLE_FREQUENCY:END_TIME*SAMPLE_FREQUENCY])
        plt.show()

    def plot_data(data, plot_name = 'None', s = START_TIME, e = END_TIME, period = 1.0/SAMPLE_FREQUENCY):
        """
        Plot out abitrary data.
        """
        print 'Plotting out figure:', plot_name
        plt.figure()
        plt.title(plot_name)
        x1 = np.arange(s, e, period)
        col = data.shape[0]
        for i in range(0, col):
            if i==1:
                plt.title(plot_name)
            plt.subplot(col, 1, i+1)
            plt.plot(x1, data[i])
            plt.ylim(-0.0015, 0.0015)
        #plt.show()


    def calculate_eigenvalue_ref(data, data_type = 'None'):
        """
        Using sliding window to calculate the change of eigenvalue
        with time.
        """
        print 'Calculating reference change of eigenvalue in ', data_type, ' domain'
        #the change of eigenvalue with time in frequency/time domain
        eigen_ref = []
        for i in range(int(REF_TIME_START*SAMPLE_FREQUENCY), int(REF_TIME_END*SAMPLE_FREQUENCY)):
            if data_type == 'Time':
                data_correlation = transforms.TimeCorrelation_whole(50, 'usf').apply(data[:, i:i+WINDOW_RANGE])
            elif data_type == 'Frequency':
                data_correlation = transforms.FreqCorrelation_whole(1, 50, 'usf').apply(data[:, i:i+WINDOW_RANGE])
            w = transforms.Eigenvalues().apply(data_correlation)
            eigen_ref.append(w)

        eigen_ref = np.array(eigen_ref)
        eigen_ref = np.swapaxes(eigen_ref, 0, 1)
        print data_type, ' eigen ref:', eigen_ref.shape
        print eigen_ref
        return eigen_ref

    def calculate_eigen_change(data, ref_mean, ref_std, data_type = 'none'):
        eigen_change = []
        for i in range(int(START_TIME*SAMPLE_FREQUENCY), int(END_TIME*SAMPLE_FREQUENCY), SAMPLE_FREQUENCY/4):
            if (data_type == 'Time'):
                data_correlation = transforms.TimeCorrelation_whole(50, 'usf').apply(data[:, i:i+WINDOW_RANGE])
            elif (data_type == 'Frequency'):
                data_correlation = transforms.FreqCorrelation_whole(1, 50, 'usf').apply(data[:, i:i+WINDOW_RANGE])

            w = transforms.Eigenvalues().apply(data_correlation)
            eigen_change.append(w)

        eigen_change = np.array(eigen_change)
        eigen_change = np.swapaxes(eigen_change, 0, 1)
        print data_type,' change:',  eigen_change
        for i in range (0, eigen_change.shape[1]):
            for j in range(0, CH_NUM):
                if i < 2:
                    print i, j
                    print eigen_change[j][i],
                    print t_eigen_ref_mean[j],
                    print t_eigen_ref_std[j][0]
                eigen_change[j][i] = (eigen_change[j][i] - ref_mean[j]) / ref_std[j][0]
        print data_type, 'eigen change normalized:', eigen_change.shape
        print eigen_change
        return eigen_change

    def calculate_stft(data, start, end, ref = []):
        stft_change_ref = []
        stft_change_ref_mean = []
        stft_change_ref_std = []
        #stft_change_ref = transforms.STFT(start, end).apply(ref)
        for i in range(0, CH_NUM):
            ch_stft_change = []
            ch_stft_change_mean = []
            ch_stft_change_std = []
            for j in range(0, ref.shape[1], STFT_PERIOD):
                ref_stft = transforms.STFT(start, end).apply(ref[i, j:j+WINDOW_RANGE])
                ch_stft_change.append(ref_stft)
            stft_change_ref.append(ch_stft_change)
            ch_stft_change = np.array(ch_stft_change[0:120][0:120])
            print 'ch change',  ch_stft_change.shape
            #print ch_stft_change
            ch_stft_change_mean = np.average(ch_stft_change, axis = 0)
            ch_stft_change = np.swapaxes(ch_stft_change,0,1)
            ch_stft_change_std = transforms.Stats().apply(ch_stft_change)

            stft_change_ref_mean.append(ch_stft_change_mean)
            stft_change_ref_std.append(ch_stft_change_std)

        stft_change_ref = np.array(stft_change_ref)
        print 'stft_change_ref', stft_change_ref.shape
        #print stft_change_ref
        #stft_change_ref = np.swapaxes(stft_change_ref, 0, 1)
        #stft_change_ref_mean = np.average(stft_change_ref, axis = 1)
        stft_change_ref_mean = np.array(stft_change_ref_mean)
        print 'stft_change_ref_mean', stft_change_ref_mean.shape
        stft_change_ref_std = np.array(stft_change_ref_std)
        print 'stft_change_ref_std', stft_change_ref_std.shape

        

        stft_change = []
        for i in range(0, CH_NUM):
            ch_stft_change = []
            for j in range(int(START_TIME*SAMPLE_FREQUENCY), int(END_TIME*SAMPLE_FREQUENCY), STFT_PERIOD):
                data_stft = transforms.STFT(start, end).apply(data[i, j:j+WINDOW_RANGE])
                for k in range(data_stft.shape[0]):
                    data_stft[k] = (data_stft[k] - stft_change_ref_mean[i][k])/stft_change_ref_std[i][k][0]
                ch_stft_change.append(data_stft)
            stft_change.append(ch_stft_change)
        """
        stft_change = []
        for i in range(int(START_TIME*SAMPLE_FREQUENCY), int(END_TIME*SAMPLE_FREQUENCY), STFT_PERIOD):
            data_stft = transforms.STFT(start, end).apply(data[:, i:i+WINDOW_RANGE])
            for j in range(data_stft.shape[0]):
                for k in range(data_stft.shape[1]):
                    if j < 2 and k < 10 and i <100:
                        print j, k
                        print data_stft[j][k]
                        print stft_change_ref_mean[j]
                        print stft_change_ref_std[j][0]
                    data_stft[j][k] = (data_stft[j][k] - stft_change_ref_mean[j])/stft_change_ref_std[j][0]

            stft_change.append(data_stft)
        """
        stft_change = np.array(stft_change)
        stft_change = np.swapaxes(stft_change, 0, 1)
        print 'stft change:',  stft_change.shape
        print stft_change
        return stft_change

    def plot_stft(data):
        fig = plt.figure()
        """
        ax = fig.gca(projection = '3d')
        X = []
        Y = []
        Z = []
        print 'stft data', data.shape
        for i in range(0, data.shape[0]):
            x = []
            y = []
            for j in range(1, data.shape[1]+1):
                x.append(i*(float(END_TIME-START_TIME)/data.shape[0])+START_TIME)
                y.append(j)
                #Z.append(data[i][j-1])
            X.append(x)
            Y.append(y)
        X = np.array(X)
        Y = np.array(Y)
        Z = np.array(Z)
       # x, y = np.meshgrid(X, Y)
        #y, z = np.meshgrid(Y, Z)
        print 'X:', X.shape
        #print X
        print 'Y:', Y.shape
        #print Y
        print 'Z:', Z.shape
       # print Z

        #ax1 = fig.add_subplot(121)
        surf = ax.plot_surface(X, Y, data, rstride = 16, cstride = 2,
                cmap = cm.coolwarm, alpha = 0.3)
        ax.set_xlabel('Time(s)')
        ax.set_ylabel('Frequency(Hz)')
        ax.set_zlabel('Magnitude')

        fig.colorbar(surf, shrink = 0.5)
        """
        data = np.swapaxes(data, 0 ,1)
        im = plt.imshow(data, origin = 'lower',
                aspect = 'auto', extent = [START_TIME,END_TIME,0,50],
                interpolation = 'none')
        fig.colorbar(im, shrink = 0.5)
        plt.title('Normalized STFT')
        plt.xlabel('Time(s)')
        plt.ylabel('Frequency(Hz)')
        plt.tight_layout() #adjust the space between plots
        plt.show()


    def calculate_slope_ref(data):
        """
        Calculate the standard deviation and normalized slope to define seizures.
        """
        print 'Calculating the reference slope and change of slope ... '
        #reference slope
        slope_stats = []
        for i in range(int(REF_TIME_START*SAMPLE_FREQUENCY), int(REF_TIME_END*SAMPLE_FREQUENCY)):
            slopes = []
            for j in range(0, CH_NUM):
                slope = (data[j, i+1] - data[j, i] ) * SAMPLE_FREQUENCY
                slopes.append(slope)
            slope_stats.append(slopes)
        slope_stats = np.array(slope_stats)
        slope_stats = np.swapaxes(slope_stats, 0 ,1)
        slope_stats = transforms.Stats().apply(slope_stats)
        print "Slope stats:", slope_stats.shape
        print slope_stats
        return slope_stats

    def calculate_slope_change(data, slope_stats, data_type = 'change'):
        #change of slope
        #note: smoothed by SMOOTHING_PERIOD s average, calculated for each sec
        slope_change = []
        seizure_num_by_slope = []
        for i in range(int(START_TIME*SAMPLE_FREQUENCY), int(END_TIME*SAMPLE_FREQUENCY), SAMPLE_FREQUENCY):
            seizure_channels_by_slope = 0
            slopes = []
            for j in range(0, CH_NUM):
                average_slope = 0.0
                for k in range(0, int(SMOOTHING_PERIOD*SAMPLE_FREQUENCY)):
                    slope = (data[j, i+1+k] - data[j, i+k] ) * SAMPLE_FREQUENCY
                    average_slope += abs(slope)
                average_slope /= SMOOTHING_PERIOD*SAMPLE_FREQUENCY
                slope_normalized = abs(average_slope / slope_stats[j][0])
                #slope_normalized = abs(slope / slope_stats[j][0])
                if (slope_normalized > SLOPE_THRESHOLD):
                    seizure_channels_by_slope += 1
                slopes.append(slope_normalized)
            slope_change.append(slopes)
            seizure_num_by_slope.append(seizure_channels_by_slope)
            
        if data_type == 'change':
            slope_change = np.array(slope_change)
            print 'slope change of each channel', slope_change.shape
            print slope_change
            return slope_change
        elif data_type == 'num':
            seizure_num_by_slope = np.array(seizure_num_by_slope)
            print 'seizure_num_by_slope', seizure_num_by_slope
            return seizure_num_by_slope

    
    def plot_figures(latencies = [], seizure_num_by_slope = [], slope_change = [], 
            t_eigen_change = [], f_eigen_change = [], stft_change = []):
        #Plot out the seizure period and correlation structure.
        print 'Plotting out the other figures.. ', 
        #seizure onset by observation
        fig = plt.figure()
        plt.subplot(ROW_NUM, COL_NUM, 3)
        plt.title('Seizure Time by Behavior')
        x2 = np.arange(START_TIME+BEHAVIOR_SHIFT, END_TIME+BEHAVIOR_SHIFT, 1.0/SAMPLE_FREQUENCY)
        plt.plot(x2, latencies[START_TIME*SAMPLE_FREQUENCY:END_TIME*SAMPLE_FREQUENCY])
        plt.axis([START_TIME, END_TIME, 0, 7])
        plt.xlabel('time(s)')
        plt.ylabel('seizure status')

        #seizure onset by slope_normalized > 2.5
        plt.subplot(ROW_NUM, COL_NUM, 4)
        plt.title('Seizure Time by (Normalized Slope > 2.5) num ')
        #x3 = np.arange(START_TIME, END_TIME, 1.0/SAMPLE_FREQUENCY)
        x3 = np.arange(START_TIME, END_TIME, 1)
        #plt.plot(x3, slope_change)
        plt.plot(x3, seizure_num_by_slope)
        plt.axis([START_TIME, END_TIME, 0, CH_NUM])
        plt.ylabel('# of (sn > 2.5)')
        
        if len(slope_change) != 0:
            #slope change of each channel
            slope_change = np.array(slope_change)
            slope_change = np.swapaxes(slope_change, 0, 1)
            plt.subplot(ROW_NUM, COL_NUM, 1)
            plt.title('Slope change of each channel(moving average by 5 sec)')
            im = plt.imshow(slope_change, origin = 'lower',
                    aspect = 'auto', extent = [START_TIME,END_TIME,1,CH_NUM],
                    interpolation = 'none')
            plt.ylabel('channel')
            fig.subplots_adjust(right = 0.93)
            plt.clim(COLOR_MIN, COLOR_MAX)
            cbax = fig.add_axes([0.94, 0.82, 0.01,0.12])
            fig.colorbar(im, cax = cbax)
        elif (len(stft_change)!=0):
            plt.subplot2grid((ROW_NUM, COL_NUM), (0,0), rowspan = 2)
            stft_change = np.swapaxes(stft_change, 0 ,1)
            im = plt.imshow(stft_change, origin = 'lower',
                    aspect = 'auto', extent = [START_TIME,END_TIME,0,50],
                    interpolation = 'none')
            plt.title('Normalized STFT')
            plt.xlabel('Time(s)')
            plt.ylabel('Frequency(Hz)')
            plt.tight_layout() #adjust the space between plots
            fig.subplots_adjust(right = 0.93)
            plt.clim(COLOR_MIN, COLOR_MAX)
            cbax = fig.add_axes([0.94, 0.82, 0.01,0.12])
            fig.colorbar(im, cax = cbax)

        else:
            #time correlation
            plt.subplot(ROW_NUM, COL_NUM, 1)
            plt.title('Time Domain Correlation Analysis (Normalized)')
            plt.imshow(t_eigen_change, origin = 'lower',
                    aspect = 'auto', extent = [START_TIME,END_TIME,0,7],
                    interpolation = 'none')
            plt.ylabel('eigenvalues')
            plt.clim(COLOR_MIN, COLOR_MAX)
         
            #phase correlation
            #f_eigen_change = np.array(f_eigen_change)
            #f_eigen_change = np.swapaxes(f_eigen_change, 0, 1)
            print "f eigen change", f_eigen_change.shape
            plt.subplot(ROW_NUM, COL_NUM, 2)
            plt.title('Frequency Domain Correlation Analysis (Normalized)')
            im = plt.imshow(f_eigen_change, origin = 'lower',
                    aspect = 'auto', extent = [START_TIME,END_TIME,0,7],
                    interpolation = 'none')
            #plt.colorbar()
            plt.tight_layout() #adjust the space between plots
            fig.subplots_adjust(right = 0.93)
            plt.clim(-5,8)
            cbax = fig.add_axes([0.94, 0.82, 0.01,0.12])
            fig.colorbar(im, cax = cbax)

        plt.show()

    start = time.get_seconds()
    initial_start = time.get_seconds()
    mat_data = read_mat_data(filename)
    #data = get_data(mat_data)
    data = get_data(mat_data, problem_channels = PROBLEM_CH)
    start = report_time(start)
    plot_data(data[STFT_CH:STFT_CH+1,START_TIME*SAMPLE_FREQUENCY:END_TIME*SAMPLE_FREQUENCY], plot_name = 'EEG')
    #plot_EEG(data)
    start = report_time(start)
    if ref!='None':
        print "Reference Data:", ref
        ref_mat_data = read_mat_data(ref)
        ref_data = get_data(ref_mat_data, problem_channels = PROBLEM_CH)
        #plot_data(ref_data[:,REF_TIME_START*SAMPLE_FREQUENCY:REF_TIME_END*SAMPLE_FREQUENCY], plot_name = 'Reference EEG', s = REF_TIME_START, e = REF_TIME_END)
    else:
        print "Reference Data:", filename
        ref_data = data

    
    slope_ref = calculate_slope_ref(ref_data)
    #slope_change = calculate_slope_change(data, slope_ref, 'change')
    slope_num = calculate_slope_change(data, slope_ref, 'num')
    start = report_time(start)
    print 'ref_data', ref_data.shape
    #stft_change_ref = calculate_stft(ref_data[:, REF_TIME_START*SAMPLE_FREQUENCY:REF_TIME_END*SAMPLE_FREQUENCY], 1, 50)

    stft_change = calculate_stft(data, 1, 50, ref = ref_data[:, REF_TIME_START*SAMPLE_FREQUENCY:REF_TIME_END*SAMPLE_FREQUENCY])
    start = report_time(start)
    stft_change = np.swapaxes(stft_change, 0 , 1)
    print 'stft change swap', stft_change.shape
    #plot_stft(stft_change[STFT_CH])
    plot_figures(latencies = get_data(mat_data, 'latencies'), seizure_num_by_slope = slope_num,
           # slope_change = slope_change,
           # t_eigen_change = t_eigen_change,
            #f_eigen_change = f_eigen_change,
            stft_change = stft_change[STFT_CH]

            )
    """
  
    t_eigen_ref = calculate_eigenvalue_ref(ref_data, data_type = "Time")
    start = report_time(start)
    t_eigen_ref_std = transforms.Stats().apply(t_eigen_ref)
    print 'ref std:'
    print t_eigen_ref_std
    start = report_time(start)
    t_eigen_ref_mean = np.average(t_eigen_ref, axis = 1)
    print 'ref avg:'
    print t_eigen_ref_mean
    start = report_time(start)
    t_eigen_change = calculate_eigen_change(data, t_eigen_ref_mean, t_eigen_ref_std, data_type = 'Time')
    start = report_time(start)

    f_eigen_ref = calculate_eigenvalue_ref(ref_data, data_type = 'Frequency')
    f_eigen_ref_std = transforms.Stats().apply(f_eigen_ref)
    f_eigen_ref_mean = np.average(f_eigen_ref, axis = 1)
    f_eigen_change = calculate_eigen_change(data, f_eigen_ref_mean, f_eigen_ref_std, data_type = 'Frequency')
    start = report_time(start)
    plot_figures(latencies = get_data(mat_data, 'latencies'), seizure_num_by_slope = slope_num,
           # slope_change = slope_change,
            t_eigen_change = t_eigen_change, f_eigen_change = f_eigen_change )
    """
    """
    plot_figures(latencies = get_data(mat_data, 'latencies'), seizure_num_by_slope = slope_num,
            slope_change = slope_change)

    """
    print '======================'
    print 'Total time:', 
    start = report_time(initial_start)
    print
    def predict_all(make_predictions):
        for pipeline in pipelines:
            for (classifier, classifier_name) in classifiers:
                print 'Using pipeline %s with classifier %s' % (pipeline.get_name(), classifier_name)
                lines = ['clip,preictal']
                subjectID = 0
                X_train = y_train = X_test = test_size = []
                for target in targets:
                    task_core = TaskCore(cached_data_loader=cached_data_loader, data_dir=data_dir,
                                         target=target, pipeline=pipeline,
                                         classifier_name=classifier_name, classifier=classifier,
                                         normalize=should_normalize(classifier), gen_preictal=pipeline.gen_preictal,
                                         cv_ratio=cv_ratio)
                    
                    data = GetCrossSubjectDataTask(task_core).run()
#                     a = np.shape(data.X_test)[0]
                    test_size.append(np.shape(data.X_test)[0])
                    if subjectID > 0:
                        X_train = np.concatenate((X_train, data.X_train), axis=0)
                        y_train = np.concatenate((y_train, data.y_train), axis=0)
                        X_test = np.concatenate((X_test, data.X_test), axis=0)
                    else:
                        X_train = data.X_train
                        y_train = data.y_train
                        X_test = data.X_test
                    subjectID += 1
                    
                #Training
                task_core = TaskCore(cached_data_loader=cached_data_loader, data_dir=data_dir,
                                     target=[], pipeline=pipeline,
                                     classifier_name=classifier_name, classifier=classifier,
                                     normalize=should_normalize(classifier), gen_preictal=pipeline.gen_preictal,
                                     cv_ratio=cv_ratio)
                y_train = np.ceil(0.1*y_train)
                y_train.astype('int_')
                if should_normalize(classifier):
                    X_train, temp = normalize_data(X_train, X_train)
                    
                print "Training ..."
                print 'Dim', np.shape(X_train), np.shape(y_train)
                start = time.get_seconds()
                classifier.fit(X_train, y_train)
                elapsedSecs = time.get_seconds() - start
                print "t=%ds" % int(elapsedSecs)
                
                y_estimate = classifier.predict_proba(X_train)
                lr = LogisticRegression(random_state = 0)      
                lr.fit(y_estimate, y_train)
                predictions_proba = classifier.predict_proba(X_test)
                predictions_calibrated = lr.predict_proba(predictions_proba)
                
                #output
                m = 0
                totalSample = 12
                startIdx = 0
                for target in targets:
                    for i in range(test_size[m]/totalSample):
                        j = i+1
                        if j < 10:
                            nstr = '000%d' %j
                        elif j < 100:    
                            nstr = '00%d' %j
                        elif j < 1000:
                            nstr = '0%d' %j
                        else:
                            nstr = '%d' %j
                        
                        preictalOverAllSample = 0
                        for k in range(totalSample):
                            p = predictions_calibrated[i*totalSample+k+startIdx]
                            preictal = translate_prediction(p)
                            preictalOverAllSample += preictal/totalSample
                         
                        newline =  '%s_test_segment_%s.mat,%.15f' % (target, nstr, preictalOverAllSample)   
                        lines.append(newline)
                        
                    print newline
                    startIdx = startIdx + test_size[m]
                    m += 1
                
                filename = 'submission%d-%s_%s.csv' % (ts, classifier_name, pipeline.get_name())
                filename = os.path.join(submission_dir, filename)
                with open(filename, 'w') as f:
                    print >> f, '\n'.join(lines)
                print 'wrote', filename
示例#51
0
    def process_raw_data(mat_data, with_latency):
        start = time.get_seconds()
        print 'Loading data',
        X = []
        y = []
        latencies = []

        prev_data = None
        prev_latency = None
        for segment in mat_data:
            data = segment['data']
            transformed_data = pipeline.apply(data)

            if with_latency:
                # this is ictal
                latency = segment['latency'][0]
                if latency <= 15:
                    y_value = 0  # ictal <= 15
                else:
                    y_value = 1  # ictal > 15

                # generate extra ictal training data by taking 2nd half of previous
                # 1-second segment and first half of current segment
                # 0.5-1.5, 1.5-2.5, ..., 13.5-14.5, ..., 15.5-16.5
                # cannot take half of 15 and half of 16 because it cannot be strictly labelled as early or late
                if gen_ictal and prev_data is not None and prev_latency + 1 == latency and prev_latency != 15:
                    # gen new data :)
                    axis = prev_data.ndim - 1

                    def split(d):
                        return np.split(d, 2, axis=axis)

                    new_data = np.concatenate(
                        (split(prev_data)[1], split(data)[0]), axis=axis)
                    X.append(pipeline.apply(new_data))
                    y.append(y_value)
                    latencies.append(latency - 0.5)

                y.append(y_value)
                latencies.append(latency)

                prev_latency = latency
            elif y is not None:
                # this is interictal
                y.append(2)

            X.append(transformed_data)
            prev_data = data

        print '(%ds)' % (time.get_seconds() - start)

        X = np.array(X)
        y = np.array(y)
        latencies = np.array(latencies)

        if ictal:
            print 'X', X.shape, 'y', y.shape, 'latencies', latencies.shape
            return X, y, latencies
        elif interictal:
            print 'X', X.shape, 'y', y.shape
            return X, y
        else:
            print 'X', X.shape
            return X
    def _load_data(self):
        """
        .. todo::

            WRITEME
        """
        import common.time as time
        start = time.get_seconds()

        from seizure.tasks import load_mat_data, count_mat_data
        from seizure.transforms import UnitScaleFeat, UnitScale
        import seizure.tasks
        seizure.tasks.task_predict = True

        # data_type is one of ('preictal', 'interictal', 'test')
        # target is one of 'Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2'
        data_dir = self.path
        data_types = ['preictal', 'interictal'] if self.expect_labels else ['test']

        N = 0
        for data_type in data_types:
            for i in count_mat_data(data_dir, self.target, data_type):
                N += 1
        print 'Number of segments', N

        Nf = None
        row = 0
        count = 0
        for data_type in data_types:
            mat_data = load_mat_data(data_dir, self.target, data_type)
            for segment in mat_data:
                for key in segment.keys():
                    if not key.startswith('_'):
                        break
                data = segment[key]['data'][0,0]

                assert data.shape[-1] == self.Nsamples

                istartend = np.linspace(0.,self.Nsamples - self.window_size, self.nwindows)

                for i in range(self.nwindows):
                    count += 1
                    if (count-1) % self.skip != 0:
                        continue
                    window = data[:,int(istartend[i]):int(istartend[i] + self.window_size)]
                    if Nf is None:
                        Nchannels = window.shape[0]
                        print 'Number of channels', Nchannels
                        N *= Nchannels * self.nwindows / self.skip
                        print 'Number of examples', N
                        Nf = window.shape[1]
                        print 'Number of features', Nf
                        X = np.empty((N, Nf))
                        y = np.empty(N)

                    if self.scale_option == 'usf':
                        window = UnitScaleFeat().apply(window)
                    elif self.scale_option == 'us':
                        window = UnitScale().apply(window)
                    X[row:row+Nchannels, :] = window
                    y[row:row+Nchannels] = (0 if data_type == 'interictal' else 1)
                    row += Nchannels

        if self.expect_labels:
            if self.one_hot:
                # get unique labels and map them to one-hot positions
                labels = np.unique(y)
                labels = dict((x, i) for (i, x) in enumerate(labels))

                one_hot = np.zeros((y.shape[0], len(labels)), dtype='float32')
                for i in xrange(y.shape[0]):
                    label = y[i]
                    label_position = labels[label]
                    one_hot[i, label_position] = 1.
                y = one_hot

        print X.shape, y.shape, y.mean(axis=-1)
        print 'time %ds' % (time.get_seconds() - start)
        return X, y
示例#53
0
    def process_raw_data(mat_data, with_latency):
        start = time.get_seconds()
        initial_start = time.get_seconds()
        print 'Loading data',

        if 'data_behavior' in mat_data:
            dataKey = 'data_behavior'
        elif 'data_3sFIR' in mat_data:
            dataKey = 'data_3sFIR'
        else:
            dataKey = 'data'
        print "mat:", mat_data[dataKey].shape
        data = mat_data[dataKey][0:CH_NUM, :]
        print data.shape, data
        if mat_data[dataKey].shape[0] > CH_NUM:
            latencies = mat_data[dataKey][CH_NUM, :]
        else:
            latencies = np.zeros(len(data[0]))
        print latencies
        """
        Plot out the original EEG signals.
        """
        print 'Plotting out the original EEG signals... ',
        channels_fig = plt.figure()
        x1 = np.arange(START_TIME, END_TIME, 1.0 / SAMPLE_FREQUENCY)
        for i in range(0, CH_NUM):
            plt.subplot(CH_NUM, 1, i + 1)
            plt.plot(
                x1, data[i, START_TIME * SAMPLE_FREQUENCY:END_TIME *
                         SAMPLE_FREQUENCY])
        print '(%ds)' % (time.get_seconds() - start)
        start = time.get_seconds()
        """
        Using sliding window to calculate the change of eigenvalue
        with time.
        """
        print 'Calculating change of eigenvalue in frequncy and time domain ... ',
        #the change of eigenvalue with time in frequency/time domain
        t_eigen_ref = []
        for i in range(int(REF_TIME_START * SAMPLE_FREQUENCY),
                       int(REF_TIME_END * SAMPLE_FREQUENCY)):
            data_tc = transforms.TimeCorrelation_whole(50, 'usf').apply(
                data[:, i:i + WINDOW_RANGE])
            w = transforms.Eigenvalues().apply(data_tc)
            t_eigen_ref.append(w)

        t_eigen_ref = np.array(t_eigen_ref)
        t_eigen_ref = np.swapaxes(t_eigen_ref, 0, 1)
        t_eigen_ref_std = transforms.Stats().apply(t_eigen_ref)
        t_eigen_ref_mean = np.average(t_eigen_ref, axis=1)
        print 't eigen ref', t_eigen_ref
        print "t eigen ref std", t_eigen_ref_std
        print 't eigen ref mean', t_eigen_ref_mean

        f_eigen_change = []
        t_eigen_change = []
        for i in range(int(START_TIME * SAMPLE_FREQUENCY),
                       int(END_TIME * SAMPLE_FREQUENCY)):
            data_tc = transforms.TimeCorrelation_whole(50, 'usf').apply(
                data[:, i:i + WINDOW_RANGE])
            w = transforms.Eigenvalues().apply(data_tc)
            t_eigen_change.append(w)
            data_fc = transforms.FreqCorrelation_whole(1, 50, 'usf').apply(
                data[:, i:i + WINDOW_RANGE])
            w = transforms.Eigenvalues().apply(data_fc)
            f_eigen_change.append(w)

        t_eigen_change = np.array(t_eigen_change)
        t_eigen_change = np.swapaxes(t_eigen_change, 0, 1)
        print 't eigen change', t_eigen_change
        for i in range(0, t_eigen_change.shape[1]):
            for j in range(0, CH_NUM):
                t_eigen_change[j][i] = (
                    t_eigen_change[j][i] -
                    t_eigen_ref_mean[j]) / t_eigen_ref_std[j][0]
                if i < 2:
                    print i, j
                    print t_eigen_change[j][i]
                    print t_eigen_ref_mean[j]
                    print t_eigen_ref_std[j][0]
        print 't eigen change normalized', t_eigen_change
        """
        for i in range(0, len(f_eigen_change)):
            f_avg = 0
            t_avg = 0
            for j in range(0, SMOOTHING_PERIOD):
                f_avg += f_eigen_change[]
        """
        print '(%ds)' % (time.get_seconds() - start)
        start = time.get_seconds()
        """
        Calculate the standard deviation and normalized slope to define seizures.
        """
        print 'Calculating the reference slope and change of slope ... ',
        #reference slope
        slope_stats = []
        for i in range(int(REF_TIME_START * SAMPLE_FREQUENCY),
                       int(REF_TIME_END * SAMPLE_FREQUENCY)):
            slopes = []
            for j in range(0, CH_NUM):
                slope = (data[j, i + 1] - data[j, i]) * SAMPLE_FREQUENCY
                slopes.append(slope)
            slope_stats.append(slopes)
        slope_stats = np.array(slope_stats)
        slope_stats = transforms.Stats().apply(slope_stats)
        print "slope stats:", slope_stats.shape

        #change of slope
        #note: smoothed by SMOOTHING_PERIOD s average, calculated for each sec
        slope_change = []
        seizure_num_by_slope = []
        for i in range(int(START_TIME * SAMPLE_FREQUENCY),
                       int(END_TIME * SAMPLE_FREQUENCY), SAMPLE_FREQUENCY):
            seizure_channels_by_slope = 0
            slopes = []
            for j in range(0, CH_NUM):
                average_slope = 0.0
                for k in range(0, SMOOTHING_PERIOD * SAMPLE_FREQUENCY):
                    slope = (data[j, i + 1 + k] -
                             data[j, i + k]) * SAMPLE_FREQUENCY
                    average_slope += slope
                average_slope /= SMOOTHING_PERIOD
                slope_normalized = abs(average_slope / slope_stats[j][0])
                #slope_normalized = abs(slope / slope_stats[j][0])
                if (slope_normalized > SLOPE_THRESHOLD):
                    seizure_channels_by_slope += 1
                slopes.append(slope_normalized)
            slope_change.append(slopes)
            seizure_num_by_slope.append(seizure_channels_by_slope)

        slope_change = np.array(slope_change)
        print 'slope change of each channel', slope_change.shape
        seizure_num_by_slope = np.array(seizure_num_by_slope)
        print 'seizure_num_by_slope', seizure_num_by_slope

        print '(%ds)' % (time.get_seconds() - start)
        start = time.get_seconds()

        #Plot out the seizure period and correlation structure.
        print 'Plotting out the other figures.. ',
        #seizure onset by observation
        fig = plt.figure()
        plt.subplot(ROW_NUM, COL_NUM, 3)
        plt.title('Seizure Time by Behavior')
        x2 = np.arange(START_TIME, END_TIME, 1.0 / SAMPLE_FREQUENCY)
        plt.plot(
            x2, latencies[START_TIME * SAMPLE_FREQUENCY:END_TIME *
                          SAMPLE_FREQUENCY])
        plt.axis([START_TIME, END_TIME, 0, 5])
        plt.xlabel('time(s)')
        plt.ylabel('seizure status')

        #seizure onset by slope_normalized > 2.5
        plt.subplot(ROW_NUM, COL_NUM, 4)
        plt.title('Seizure Time by (Normalized Slope > 2.5) num ')
        #x3 = np.arange(START_TIME, END_TIME, 1.0/SAMPLE_FREQUENCY)
        x3 = np.arange(START_TIME, END_TIME, 1)
        #plt.plot(x3, slope_change)
        plt.plot(x3, seizure_num_by_slope)
        plt.axis([START_TIME, END_TIME, 0, 8])
        plt.ylabel('# of (sn > 2.5)')

        #slope change of each channel
        slope_change = np.array(slope_change)
        slope_change = np.swapaxes(slope_change, 0, 1)
        plt.subplot(ROW_NUM, COL_NUM, 1)
        plt.title('Slope change of each channel(moving average by 5 sec)')
        plt.imshow(slope_change,
                   origin='lower',
                   aspect='auto',
                   extent=[START_TIME, END_TIME, 1, CH_NUM],
                   interpolation='none')
        plt.ylabel('channel')
        #plt.colorbar()
        """
        #time correlation
        plt.subplot(ROW_NUM, COL_NUM, 1)
        plt.title('Time Domain Correlation Analysis')
        plt.imshow(t_eigen_change, origin = 'lower',
                aspect = 'auto', extent = [START_TIME,END_TIME,0,7])#, interpolation = 'none')
        plt.ylabel('eigenvalues')
        plt.colorbar()

        #phase correlation
        f_eigen_change = np.array(f_eigen_change)
        f_eigen_change = np.swapaxes(f_eigen_change, 0, 1)
        print "f eigen change", f_eigen_change.shape
        plt.subplot(ROW_NUM, COL_NUM, 2)
        plt.title('Frequency Domain Correlation Analysis')
        plt.imshow(f_eigen_change, origin = 'lower',
                aspect = 'auto', extent = [START_TIME,END_TIME,0,7],
                interpolation = 'none')
        #plt.colorbar()

        print '(%ds)' % (time.get_seconds() - start)
        start = time.get_seconds()

        latencies = np.array(latencies)
        print latencies
        """

        plt.tight_layout()  #adjust the space between plots
        plt.show()