def user_cost(y_true, y_pred):
    """
    user_cost(y_true, y_pred)
    
    Parameters
    ----------
    y_true : 1ndarray bool, ground truth values
    y_pred : 1ndarray bool, predicted values

    Returns
    -------
    cost : float
    """

    detected = 0  # number of detected seizures

    # get bounds of sezures
    bounds_true = find_szr_idx(y_true, np.array([0, 1]))  # total predicted
    bounds_pred = find_szr_idx(y_pred, np.array([0, 1]))  # total predicted
    bounds_pred = merge_close(bounds_pred,
                              merge_margin=5)  # merge seizures close together

    if bounds_pred.shape[0] > 0:  # find matching seizures
        detected = match_szrs(bounds_true, bounds_pred, err_margin=10)

    # calculate cost
    a = 1 - (detected / bounds_true.shape[0])  # get detected ratio
    b = (bounds_pred.shape[0] - detected)  # get false positives
    cost = a + np.log10(b + 1)  # cost function

    return cost
    def folder_loop(self, folder_name):
        """
        folder_loop(self, folder_name)

        Parameters
        ----------
        folder_name : Str, folder name

        Returns
        -------
        bool
        """
        
        # get file list 
        ver_path = os.path.join(self.main_path, folder_name,'verified_predictions_pantelis')
        if os.path.exists(ver_path)== False: # error check
                print('path not found, skipping:', os.path.join(self.main_path, folder_name) ,'.')
                return False
        filelist = list(filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions
        filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending
     
        for i in tqdm(range(0, len(filelist))): # iterate through experiments
    
            # get data and true labels
            data, y_true = get_data(os.path.join(self.main_path, folder_name),filelist[i], ch_num = ch_list, 
                                    inner_path={'data_path':'filt_data', 'pred_path':'verified_predictions_pantelis'} , load_y = True)
            
            # Get features and labels
            x_data, labels = get_features_allch(data,param_list,cross_ch_param_list)
    
            # Normalize data
            x_data = StandardScaler().fit_transform(x_data)
            
            # get bounds of true seizures
            bounds_true = find_szr_idx(y_true, np.array([0,1]))
            
            if bounds_true.shape[0] > 0:  # proceed if seizures are present  
            
                for ii in range(len(self.feature_labels)): # iterate through parameteres
        
                    # detect seizures bigger than threshold
                    y_pred = x_data[:,ii]> (np.mean(x_data[:,ii]) + self.threshold*np.std(x_data[:,ii]))
                    
                    # get bounds of predicted sezures
                    bounds_pred = find_szr_idx(y_pred, np.array([0,1])) # total predicted
                    bounds_pred = merge_close(bounds_pred, merge_margin = 5) # merge seizures close together                  
                    detected = match_szrs(bounds_true, bounds_pred, err_margin = 10) # find matching seizures

                    # get total numbers
                    self.df.at[ii, 'total'] += bounds_true.shape[0] 
                    self.df.at[ii, 'detected'] += detected
                    self.df.at[ii, 'false_positives'] += bounds_pred.shape[0] - detected
        return True
def find_threshold(x_data, y_true):

    # thresh = 1;
    ftr = 8

    x = x_data[:, ftr]

    # fig = plt.figure()
    # ax = fig.add_subplot(111)
    # t = np.ones(x.shape[0]) * (np.mean(x) + thresh*np.std(x))
    # line1 = ax.plot(x)
    # line2 = ax.plot(t)

    n_loop = 100
    cost_array = np.zeros(n_loop)
    thresh_array = np.zeros(n_loop)
    thresh_array = np.linspace(1, 20, n_loop)
    for i in range(n_loop):

        # thresh_array[i] = thresh
        y_pred = x > (np.mean(x) + thresh_array[i] * np.std(x))

        # get number of seizures
        bounds_true = find_szr_idx(y_true, np.array([0, 1]))  # true
        bounds_pred = find_szr_idx(y_pred, np.array([0, 2]))  # predicted

        # merge seizures close together
        if bounds_pred.shape[0] > 1:
            bounds_pred = merge_close(bounds_pred, merge_margin=5)

        cost = create_cost(bounds_true, bounds_pred)  # get cost

        # cost = log_loss(y_true, y_pred ,labels =[True,False])

        cost_array[i] = cost

        # if cost == 0:
        #     print('cost has reached zero, stopping')
        #     return cost_array,thresh_array
        # thresh += cost # update cost
        # ax.plot(np.ones(x.shape[0]) * (np.mean(x) + thresh*np.std(x)))
        # line2[0].set_ydata(np.ones(x.shape[0]) * (np.mean(x) + thresh*np.std(x)))
        # fig.canvas.draw()

    plt.figure()
    plt.plot(thresh_array, cost_array)
    plt.ylabel('cost')
    plt.xlabel('thresh')
    print('seizures = ', bounds_true.shape[0])
    return cost_array, thresh_array
示例#4
0
    def main_func(self, file_id):
        """
        main_func(self, file_id)

        Parameters
        ----------
        file_id : String

        Returns
        -------
        data : 3d Numpy Array (1D = segments, 2D = time, 3D = channel)
        idx_bounds : 2D Numpy Array (rows = seizures, cols = start and end points of detected seizures)

        """
        
        print('-> File being analyzed: ', file_id,'\n')

        # Get predictions
        pred_path = os.path.join(self.rawpred_path, file_id) # get path
        bin_pred = np.loadtxt(pred_path, delimiter=',', skiprows=0) # get predictions
        idx_bounds = find_szr_idx(bin_pred[:,1]>0.5, np.array([0,1])) # find seizure boundaries
           
        # load raw data for visualization
        data_path = os.path.join(self.org_rawpath, file_id.replace('.csv','.h5'))
        f = tables.open_file(data_path, mode='r')
        data = f.root.data[:]
        f.close()
        
        # check whether to continue
        print('>>>>',idx_bounds.shape[0] ,'seizures detected')
        
        return data, idx_bounds
示例#5
0
    def append_func(self, ds_x, ds_y):
        '''
        Append training dataset to pytables datastore for one folder

        '''

        for i in range(len(self.filelist)):  # loop through files
            print('extracting data from', self.filelist[i])

            # load data
            f = tables.open_file(os.path.join(self.data_path,
                                              self.filelist[i] + '.h5'),
                                 mode='r')
            data = f.root.data[:]
            f.close()

            # Get ground truth data
            y = np.loadtxt(os.path.join(self.verpred_path,
                                        self.filelist[i] + '.csv'),
                           delimiter=',',
                           skiprows=0)

            # find seizure segments
            idx_bounds = find_szr_idx(y, np.array([0, 1], dtype=int))

            if idx_bounds.shape[0] > 0:  # if seizures were detected

                # get x and y data
                x_data, y_data = self.get_data(data, idx_bounds)  #
                # x_data, y_data = get_data_static(self.win,self.fs,self.chnls, data, idx_bounds) #
                self.samples_added += y_data.shape[0]
                print(self.samples_added, 'samples added')
                # append x and y data to datastore
                ds_x.append(x_data)
                ds_y.append(y_data)
def main_func(main_path):

    # dict load
    settings = lab2mat.load(os.path.join(main_path, 'organized.json'))

    # get user seizures
    df = pd.read_csv(os.path.join(main_path, 'Extracted_seizures.csv'),
                     header=None)

    # get verified predictions file list
    ver_path = os.path.join(main_path, 'verified_predictions_pantelis')
    filelist = list(filter(lambda k: '.csv' in k, os.listdir(ver_path)))

    if len(df) != len(filelist):  # file check
        print(
            'Warning: length of extracted seizures does not match list of verified predictions!'
        )
    print(str(len(filelist)) + ' files will be analyzed...')

    # create dataframe to store metrics
    df_save = pd.DataFrame(np.zeros((len(df) + 1, 3)),
                           columns=['total', 'detected', 'false_positives'])
    df_save.insert(0, 'exp_id', filelist + ['Grand_sum'])

    for i in tqdm(range(len(filelist))):

        # get user scored seizured index (gold standard)
        true_idx = get_szr_index(df, filelist[i].replace('.csv', ''))

        # load seizure index from user-curated model-predictions
        rawpred = np.loadtxt(os.path.join(ver_path, filelist[i]),
                             delimiter=',',
                             skiprows=1)

        if np.sum(rawpred) > 0:  # check if any seizures were detected

            # get index bounds of semi-manual detected seizures
            pred_bounds = find_szr_idx(rawpred, np.array([0, 1]))
            pred_bounds *= settings['win']  # convert to seconds

            # get matching seizures
            df_save['total'].at[i] = true_idx.shape[0]  # total
            df_save['detected'].at[i], non_detected_idx = get_match(
                true_idx, pred_bounds)  # detected
            df_save['false_positives'].at[i] = pred_bounds.shape[0] - df_save[
                'detected'][i]  # false positives

            if df_save['total'][i] != df_save['detected'][i]:
                print('not all seizures were detected')
                print(filelist[i], get_hours(non_detected_idx))

    # get grand totals
    df_save['total'].at[i + 1] = df_save['total'].sum()
    df_save['detected'].at[i + 1] = df_save['detected'].sum()
    df_save['false_positives'].at[i + 1] = df_save['false_positives'].sum()

    # save csv
    df_save.to_csv(os.path.join(main_path, 'detected.csv'), index=False)
    print('Metrics for seizure matching completed')
示例#7
0
    def folder_loop(self, folder_name):
        """
        folder_loop(self, folder_name)

        Parameters
        ----------
        folder_name : Str, folder name

        Returns
        -------
        bool
        """

        # get file list
        ver_path = os.path.join(self.main_path, folder_name,
                                'verified_predictions_pantelis')
        if os.path.exists(ver_path) == False:  # error check
            print('path not found, skipping:',
                  os.path.join(self.main_path, folder_name), '.')
            return False
        filelist = list(
            filter(lambda k: '.csv' in k,
                   os.listdir(ver_path)))  # get only files with predictions
        filelist = [os.path.splitext(x)[0]
                    for x in filelist]  # remove csv ending

        for i in tqdm(range(0, len(filelist))):  # iterate through experiments

            # get data and true labels
            data, y_true = get_data(os.path.join(self.main_path, folder_name),
                                    filelist[i],
                                    ch_num=ch_list,
                                    inner_path={
                                        'data_path':
                                        'filt_data',
                                        'pred_path':
                                        'verified_predictions_pantelis'
                                    },
                                    load_y=True)

            x_data, labels = get_features_allch(
                data, param_list,
                cross_ch_param_list)  # Get features and labels
            x_data = StandardScaler().fit_transform(x_data)  # Normalize data
            bounds_true = find_szr_idx(y_true, np.array(
                [0, 1]))  # get bounds of true seizures

            self.df_cntr = 0
            # restart df_cntr
            for ii in range(len(self.thresh_array)):
                # detect seizures bigger than threshold
                thresh = (np.mean(x_data) +
                          self.thresh_array[ii] * np.std(x_data)
                          )  # get threshold
                y_pred_array = x_data > thresh  # get predictions
                self.append_pred(y_pred_array,
                                 bounds_true)  # add predictions to self.df
        return True
示例#8
0
    def save_func(self):
        '''
        Save training dataset to pytables datastore for one folder

        '''

        # Saving Parameters
        atom = tables.Float64Atom()  # declare data type
        fsave_xdata = tables.open_file(os.path.join(main_path, 'x_data.h5'),
                                       mode='w')  # open tables object
        fsave_ytrue = tables.open_file(os.path.join(main_path, 'y_data.h5'),
                                       mode='w')  # open tables object
        ds_x = fsave_xdata.create_earray(
            fsave_xdata.root,
            'data',
            atom,  # create data store 
            [0, int(self.win * self.fs), self.chnls])
        ds_y = fsave_ytrue.create_earray(
            fsave_ytrue.root,
            'data',
            atom,  # create data store 
            [0])

        for i in tqdm(range(len(self.filelist))):  # loop through files

            # load data
            f = tables.open_file(os.path.join(self.data_path,
                                              self.filelist[i] + '.h5'),
                                 mode='r')
            data = f.root.data[:]
            f.close()

            # Get ground truth data
            y = np.loadtxt(os.path.join(self.verpred_path,
                                        self.filelist[i] + '.csv'),
                           delimiter=',',
                           skiprows=0)

            # find seizure segments
            idx_bounds = find_szr_idx(y, np.array([0, 1], dtype=int))

            if idx_bounds.shape[0] > 0:  # if seizures were detected

                # get x and y data
                x_data, y_data = self.get_data(data, idx_bounds)  #
                # x_data, y_data = get_data_static(self.win,self.fs,self.chnls, data, idx_bounds) #

                # append x and y data to datastore
                ds_x.append(x_data)
                ds_y.append(y_data)

        # close save objects
        fsave_xdata.close()
        fsave_ytrue.close()
        print('Training dataset created.')
def find_threshold_all(x_data, y_true):

    thresh = 1
    ftr = 1

    x = x_data[:, ftr]

    fig = plt.figure()
    ax = fig.add_subplot(111)
    t = np.ones(x.shape[0]) * (np.mean(x) + thresh * np.std(x))
    line1 = ax.plot(x)
    line2 = ax.plot(t)

    n_loop = 100
    cost_array = np.zeros(n_loop)
    thresh_array = np.zeros(n_loop)
    # thresh_array = np.linspace(10, 0, n_loop)
    for i in range(n_loop):

        thresh_array[i] = thresh
        y_pred = x > (np.mean(x) + thresh_array[i] * np.std(x))

        # get number of seizures
        bounds_true = find_szr_idx(y_true, np.array([0, 1]))  # true
        bounds_pred = find_szr_idx(y_pred, np.array([0, 1]))  # predicted

        # merge seizures close together
        if bounds_pred.shape[0] > 1:
            bounds_pred = merge_close(bounds_pred, merge_margin=5)

        cost = create_cost(bounds_true, bounds_pred)  # get cost

        # cost = log_loss(y_true, y_pred ,labels =[True,False])

        cost_array[i] = cost

        if cost == 0:
            print('cost has reached zero, stopping')
            return cost_array, thresh_array

    return cost_array, thresh_array
def get_min_cost(feature, y_true):
    """
    get_min_cost(feature, y_true)

    Parameters
    ----------
    feature : 1D ndarray, extracted feature
    y_true : 1D ndarray, bool grund truth labels
    Returns
    -------
    TYPE: Float, threshold value that gves minimum cost

    """

    n_loop = 100  # loop number and separation
    thresh_array = np.linspace(1, 20, n_loop)  # thresholds to test
    cost_array = np.zeros(n_loop)

    for i in range(n_loop):

        # thresh_array[i] = thresh
        y_pred = feature > (np.mean(feature) +
                            thresh_array[i] * np.std(feature))

        # get number of seizures
        bounds_true = find_szr_idx(y_true, np.array([0, 1]))  # true
        bounds_pred = find_szr_idx(y_pred, np.array([0, 1]))  # predicted

        # merge seizures close together
        if bounds_pred.shape[0] > 1:
            bounds_pred = merge_close(bounds_pred, merge_margin=5)

        cost = szr_cost(bounds_true, bounds_pred)  # get cost

        # pass to array
        cost_array[i] = cost

    return thresh_array[np.argmin(cost_array)]
示例#11
0
    def append_pred(self, y_pred_array, bounds_true):
        """
        Adds metrics to self.df

        Parameters
        ----------
        y_pred_array : np array, bool (rows = time, columns = features)
        bounds_true : np.array (rows = seizures, cols= [start idx, stop idx])
        """

        for i in range(len(self.weights)):
            for ii in range(len(self.feature_set)):

                # find predicted seizures
                y_pred = y_pred_array * self.weights[i] * self.feature_set[
                    ii]  # get predictions based on weights and selected features
                y_pred = np.sum(y_pred, axis=1) / np.sum(
                    self.weights[i] * self.feature_set[ii]
                )  # normalize to weights and selected features
                y_pred = y_pred > 0.5  # get popular vote
                bounds_pred = find_szr_idx(y_pred, np.array(
                    [0, 1]))  # get predicted seizure index

                detected = 0  # set default detected to 0
                if bounds_pred.shape[0] > 0:
                    # get bounds of predicted sezures
                    bounds_pred = merge_close(
                        bounds_pred,
                        merge_margin=5)  # merge seizures close together
                    detected = match_szrs(
                        bounds_true, bounds_pred,
                        err_margin=10)  # find matching seizures

                # get total numbers
                self.df['total'][self.df_cntr] += bounds_true.shape[
                    0]  # total true
                self.df['detected'][
                    self.df_cntr] += detected  # n of detected seizures
                self.df['false_positives'][self.df_cntr] += bounds_pred.shape[
                    0] - detected  # n of false positives
                self.df_cntr += 1  # update counter
示例#12
0
    def folder_loop(self, folder_name):
        """
        folder_loop(self, folder_name)

        Parameters
        ----------
        folder_name : Str, folder name

        Returns
        -------
        bool
        """

        # get file list
        ver_path = os.path.join(self.main_path, folder_name,
                                'verified_predictions_pantelis')
        if os.path.exists(ver_path) == False:  # error check
            print('path not found, skipping:',
                  os.path.join(self.main_path, folder_name), '.')
            return False
        filelist = list(
            filter(lambda k: '.csv' in k,
                   os.listdir(ver_path)))  # get only files with predictions
        filelist = [os.path.splitext(x)[0]
                    for x in filelist]  # remove csv ending

        for i in tqdm(range(0, len(filelist))):  # iterate through experiments

            # get data and true labels
            data, y_true = get_data(os.path.join(self.main_path, folder_name),
                                    filelist[i],
                                    ch_num=ch_list,
                                    inner_path={
                                        'data_path':
                                        'filt_data',
                                        'pred_path':
                                        'verified_predictions_pantelis'
                                    },
                                    load_y=True)

            x_data, labels = get_features_allch(
                data, param_list,
                cross_ch_param_list)  # Get features and labels
            x_data = StandardScaler().fit_transform(x_data)  # Normalize data
            bounds_true = find_szr_idx(y_true, np.array(
                [0, 1]))  # get bounds of true seizures

            for ii in range(len(self.df)):  # iterate through df
                # detect seizures bigger than threshold
                thresh = (
                    np.mean(x_data) +
                    np.array(self.df.loc[ii][self.thresh]) * np.std(x_data)
                )  # get threshold
                y_pred_array = x_data > thresh  # get predictions

                # find predicted seizures
                w = np.array(self.df.loc[ii][self.weights])  # get weights
                e = np.array(
                    self.df.loc[ii][self.enabled])  # get enabled features
                y_pred = y_pred_array * w * e  # get predictions based on weights and selected features
                y_pred = np.sum(y_pred, axis=1) / np.sum(
                    w * e)  # normalize to weights and selected features
                y_pred = y_pred > 0.5  # get popular vote
                bounds_pred = find_szr_idx(y_pred, np.array(
                    [0, 1]))  # get predicted seizure index

                detected = 0  # set default detected to 0
                if bounds_pred.shape[0] > 0:
                    # get bounds of predicted sezures
                    bounds_pred = merge_close(
                        bounds_pred,
                        merge_margin=5)  # merge seizures close together
                    detected = match_szrs(
                        bounds_true, bounds_pred,
                        err_margin=10)  # find matching seizures

                # get total numbers
                self.df['total'][ii] += bounds_true.shape[0]  # total true
                self.df['detected'][ii] += detected  # n of detected seizures
                self.df['false_positives'][ii] += bounds_pred.shape[
                    0] - detected  # n of false positives

        return True
def file_loop(main_path):

    # get data list
    ver_path = os.path.join(main_path, 'verified_predictions_pantelis')
    filelist = list(
        filter(lambda k: '.csv' in k,
               os.listdir(ver_path)))  # get only files with predictions
    filelist = [os.path.splitext(x)[0] for x in filelist]  # remove csv ending

    true_total = 0
    total_detected = 0
    total_exta = 0
    for i in range(0, len(filelist)):  # loop through files #

        # get data and true labels
        data, y_true = get_data(main_path, filelist[i], ch_num=num_channels)
        print('->', filelist[i], 'loaded.')

        # Clean and filter data
        data = preprocess_data(data, clean=True, filt=False)
        print('-> data pre-processed.')

        # Get features and labels
        x_data, feature_labels = get_features_allch(data, param_list,
                                                    cross_ch_param_list)
        print('-> features extracted')

        # Normalize data
        x_data = StandardScaler().fit_transform(x_data)

        # make predictions
        xbest = x_data[:, 1] * x_data[:, 9]
        threshold = np.mean(xbest) + 4 * np.std(xbest)
        y_pred = xbest > threshold

        # get number of  seizures
        bounds_pred = find_szr_idx(y_pred, np.array([0, 1]))  # predicted
        bounds_true = find_szr_idx(y_true, np.array([0, 1]))  # true

        # plot figures
        if bounds_pred.shape[0] > 0:
            # plt.figure()
            # ax = plt.axes()
            # ax.plot(xbest,c='k')
            # y = xbest
            # x =  np.linspace(1,y.shape[0],y.shape[0])
            # ix = np.where(y_true == 1)
            # ax.scatter(x[ix], y[ix], c = 'blue', label = 'true', s = 15)
            # ix = np.where(y_pred == 1)
            # ax.scatter(x[ix], y[ix], c = 'orange', label = 'predicted', s = 8)
            # ax.legend()

            # merge seizures close together
            bounds_pred = merge_close(bounds_pred, merge_margin=5)

            # find matching seizures
            detected = match_szrs(bounds_true, bounds_pred, err_margin=10)

            print('Detected', detected, 'out of', bounds_true.shape[0],
                  'seizures')
            print('+', bounds_pred.shape[0] - detected, 'extra \n')

            true_total += bounds_true.shape[0]
            total_detected += detected
            total_exta += bounds_pred.shape[0] - detected

    print('Total detected', total_detected, 'out of', true_total, 'seizures')
    print(total_exta, 'extra seizures')
    print('Time elapsed = ', time.time() - tic, 'seconds.')
    return true_total, total_detected, total_exta
示例#14
0
def folder_loop(folder_path, thresh_multiplier = 5):
    
    # get file list 
    ver_path = os.path.join(folder_path, 'verified_predictions_pantelis')
    if os.path.exists(ver_path)== False:
            print('path not found, skipping:', os.path.join(main_path, folder_path) ,'.')
            return False, False
    filelist = list(filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions
    filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending
    
    # create feature labels
    feature_labels=[]
    for n in ch_list:
        feature_labels += [x.__name__ + '_'+ str(n) for x in param_list]
    feature_labels += [x.__name__  for x in cross_ch_param_list]
    feature_labels = np.array(feature_labels)
    
    # create dataframe
    columns = ['true_total', 'total_detected', 'total_exta']
    df = pd.DataFrame(data= np.zeros((len(feature_labels),len(columns))), columns = columns, dtype=np.int64)
    df['Features'] = feature_labels
    
    # create seizure array
    szrs = np.zeros((len(filelist),3,feature_labels.shape[0]))
    
    # get total time analized
    time = 0    
    for i in tqdm(range(0, len(filelist))): # loop through experiments

        # get data and true labels
        data, y_true = get_data(folder_path,filelist[i], ch_num = ch_list, 
                                inner_path={'data_path':'filt_data', 'pred_path':'verified_predictions_pantelis'} , load_y = True)
        
        ## UNCOMMENT LINE BELOW TO : Clean and filter data
        # data = preprocess_data(data,  clean = True, filt = True, verbose = 0)
        # print('-> data pre-processed.')
        
        # Get features and labels
        x_data, labels = get_features_allch(data,param_list,cross_ch_param_list)
        
        #  UNCOMMENT LINES BELOW TO : get refined data (multiply channels)
        # new_data = np.multiply(x_data[:,0:len(param_list)],x_data[:,len(param_list):x_data.shape[1]-len(cross_ch_param_list)])
        # x_data = np.concatenate((new_data, x_data[:,x_data.shape[1]-1:]), axis=1)
        
        # Normalize data
        x_data = StandardScaler().fit_transform(x_data)
        
        time+=x_data.shape[0]
        for ii in range(len(feature_labels)): # iterate through parameteres  x_data.shape[1]

            # get boolean index
            
            # Percentile
            # y_pred = x_data[:,ii]> np.percentile(x_data[:,ii], thresh_multiplier)
            
            # SD
            y_pred = x_data[:,ii]> (np.mean(x_data[:,ii]) + thresh_multiplier*np.std(x_data[:,ii]))
            
            # y_pred1 = x_data[:,ii]> (np.mean(x_data[:,ii]) + thresh_multiplier*np.std(x_data[:,ii]))
            # y_pred2 = x_data[:,ii+len(feature_labels)]> (np.mean(x_data[:,ii+len(feature_labels)]) + thresh_multiplier*np.std(x_data[:,ii+len(feature_labels)]))
            
            # y_pred = (y_pred1.astype(int) + y_pred2.astype(int)) == 2
            ## UNCOMMENT LINE BELOW: for running threshold
            ## y_pred = running_std_detection(x_data[:,ii] , 5, int(60/5)*120)
            
            # get number of seizures
            bounds_pred = find_szr_idx(y_pred, np.array([0,1])) # predicted
            bounds_true = find_szr_idx(y_true, np.array([0,1])) # true
            
            # get true number of seizures
            szrs[i,0,ii] = bounds_true.shape[0] 
            
            # plot figures
            if bounds_pred.shape[0] > 0:
            
                # merge seizures close together
                bounds_pred = merge_close(bounds_pred, merge_margin = 5)
            
                # find matching seizures
                detected = match_szrs(bounds_true, bounds_pred, err_margin = 10)
                
                # get number of matching and extra seizures detected
                szrs[i,1,ii] = detected # number of true seizures detected
                szrs[i,2,ii] = bounds_pred.shape[0] - detected # number of extra seizures detected         
                
            # get total numbers
            df.at[ii, 'true_total'] += szrs[i,0,ii]
            df.at[ii, 'total_detected'] +=  szrs[i,1,ii]
            df.at[ii, 'total_exta'] += szrs[i,2,ii]
            
    print(time*5/60, 'minutes of eeg recordings')        
    return df, szrs
    def get_feature_pred(self, file_id):
        """
        get_feature_pred(self, file_id)

        Parameters
        ----------
        file_id : Str

        Returns
        -------
        data : 3d Numpy Array (1D = segments, 2D = time, 3D = channel)
        bounds_pred : 2D Numpy Array (rows = seizures, cols = start and end points of detected seizures)

        """

        # Define parameter list
        param_list = (
            features.autocorr,
            features.line_length,
            features.rms,
            features.mad,
            features.var,
            features.std,
            features.psd,
            features.energy,
            features.get_envelope_max_diff,
        )  # single channel features
        cross_ch_param_list = (
            features.cross_corr,
            features.signal_covar,
            features.signal_abs_covar,
        )  # cross channel features

        # Get data and true labels
        data = get_data(self.gen_path,
                        file_id,
                        ch_num=ch_list,
                        inner_path={'data_path': 'filt_data'},
                        load_y=False)

        # Extract features and normalize
        x_data, labels = get_features_allch(
            data, param_list, cross_ch_param_list)  # Get features and labels
        x_data = StandardScaler().fit_transform(x_data)  # Normalize data

        # Get predictions
        thresh = (np.mean(x_data) + self.thresh * np.std(x_data)
                  )  # get threshold vector
        y_pred_array = (x_data > thresh)  # get predictions for all conditions
        y_pred = y_pred_array * self.weights * self.enabled  # get predictions based on weights and selected features
        y_pred = np.sum(y_pred, axis=1) / np.sum(
            self.weights *
            self.enabled)  # normalize to weights and selected features
        y_pred = y_pred > 0.5  # get popular vote
        bounds_pred = find_szr_idx(y_pred,
                                   np.array([0, 1
                                             ]))  # get predicted seizure index

        # If seizures are detected proceed to refine them
        if bounds_pred.shape[0] > 0:

            # Merge seizures close together
            bounds_pred = merge_close(bounds_pred, merge_margin=5)

            # Remove seizures where a feature (line length or power) is not higher than preceeding region
            idx = np.where(
                np.char.find(self.feature_names, 'line_length_0') == 0)[0][0]
            bounds_pred = self.refine_based_on_surround(
                x_data[:, idx], bounds_pred)

        return bounds_pred
    def folder_loop(self, folder_path):
        """
        folder_loop(self, folder_path)
        
        Parameters
        ----------
        folder_path : Str, to child dir
    
        """

        # get path
        ver_path = os.path.join(self.main_path, folder_path,
                                'verified_predictions_pantelis')
        if os.path.exists(ver_path) == False:
            print('path not found, skipping:',
                  os.path.join(self.main_path, folder_path), '.')
            return False

        # get file list
        filelist = list(
            filter(lambda k: '.csv' in k,
                   os.listdir(ver_path)))  # get only files with predictions
        filelist = [os.path.splitext(x)[0]
                    for x in filelist]  # remove csv ending

        for i in tqdm(range(
                0, len(filelist))):  # loop through experiments   len(filelist)

            # get data and true labels
            data, y_true = get_data(os.path.join(self.main_path, folder_path),
                                    filelist[i],
                                    ch_num=ch_list,
                                    inner_path={
                                        'data_path':
                                        'filt_data',
                                        'pred_path':
                                        'verified_predictions_pantelis'
                                    },
                                    load_y=True)

            ## UNCOMMENT LINE BELOW TO : Clean and filter data
            # data = preprocess_data(data,  clean = True, filt = True, verbose = 0)

            # Get features and labels
            x_data, labels = get_features_allch(data, param_list,
                                                cross_ch_param_list)

            # Normalize data
            x_data = StandardScaler().fit_transform(x_data)

            for ii in range(
                    len(self.feature_labels)
            ):  # iterate through parameteres  x_data.shape[1] len(feature_labels)

                # create dataframe
                df = pd.DataFrame(data=np.zeros((0, len(self.columns))),
                                  columns=self.columns,
                                  dtype=np.int64)

                # get seizure index
                bounds_true = find_szr_idx(y_true, np.array([0, 1]))  # true

                if bounds_true.shape[0] > 0:
                    # get seizure and surround properties
                    szrs = GetCatalogue.get_surround(x_data[:,
                                                            ii], bounds_true,
                                                     self.time_bins,
                                                     self.szr_properties)

                    # insert seizure start and end
                    df['exp_id'] = [os.path.join(folder_path, filelist[i])
                                    ] * bounds_true.shape[0]
                    df['szr_start'] = bounds_true[:, 0]
                    df['szr_end'] = bounds_true[:, 1]

                    # append seizure properties
                    df.iloc[:, 3:] = szrs

                    # append to dataframe
                    df.to_csv(os.path.join(self.save_folder,
                                           self.feature_labels[ii] + '.csv'),
                              mode='a',
                              header=False,
                              index=False)