Python interpolate_nan_values示例，hrvanalysis.interpolate_nan_values Python示例

示例#1

0

显示文件

文件： Heart_Data_Procedures.py 项目： Freihaut/Freihaut-Studyfiles_Lab19_MouseAnalysis

def calc_hrv_params(data, phase):

    params = {}

    # Delete 0-Values from Dataset (prevent interpolation of 0-values)
    data = [i for i in data["IBI"] if i != 0]

    # remove outlier data points using the hrv analysis package https://github.com/Aura-healthcare/hrvanalysis
    rr_intervals_without_outliers = hrvanalysis.remove_outliers(rr_intervals=data, low_rri=350, high_rri=1800)
    # interpolate outliers using the hrv analysis package https://github.com/Aura-healthcare/hrvanalysis
    preprocessed_data = hrvanalysis.interpolate_nan_values(rr_intervals=rr_intervals_without_outliers,
                                                           interpolation_method='linear')

    # If the first or last datapoint is a NaN, it can`t be interpolated and must be kicked
    cleaned_data = [i for i in preprocessed_data if not np.isnan(i)]

    # The HRV package calculates different heart rate related parameters (of which only the mean heart rate is used
    # as a stressmarker in the present study, the full code to calculate all HRV parameters is listed below)
    # see https://github.com/Aura-healthcare/hrvanalysis

    # Time Domain Analysis
    hrv_time_domain = hrvanalysis.get_time_domain_features(cleaned_data)

    # Only get the mean heart rate parameter
    params.update({phase[3:] + "_mean_HR": hrv_time_domain["mean_hr"]})

    # get all HRV time domain parameters:
    # Mean_NNI, SDNN, SDSD, NN50, pNN50, NN20, pNN20, RMSSD, Median_NN,
    # Range_NN, CVSD, CV_NNI, Mean_HR, Max_HR, Min_HR, STD_HR
    # for key in hrv_time_domain.keys():
    #     params.update({phase[3:] + "_" + key: hrv_time_domain[key]})

    # Frequency Domain Anaylsis
    # hrv_frequency_domain = hrvanalysis.get_frequency_domain_features(cleaned_data, method='welch',
    #                                                                   sampling_frequency=4,
    #                                                                  interpolation_method='cubic',
    #                                                                  vlf_band=(0.003, 0.04), lf_band=(0.04, 0.15),
    #                                                                  hf_band=(0.15, 0.4))
    # get all HRV time domain parameters:
    # LF, HF, VLF, LH/HF ratio, LFnu, HFnu, Total_Power
    # for key in hrv_frequency_domain.keys():
    #     params.update({phase[2:] + "_" + key: hrv_frequency_domain[key]})

    # Geometrical Analysis
    # hrv_geometrical_features = hrvanalysis.extract_features.get_geometrical_features(cleaned_data)
    # get all geometrical analysis parameters:
    # Triangular_index, TINN
    # for key in hrv_geometrical_features.keys():
    #     params.update({phase[2:] + "_" + key: hrv_geometrical_features[key]})

    # CSI/CVI analysis
    # hrv_csi_cvi_features = hrvanalysis.extract_features.get_csi_cvi_features(cleaned_data)
    # get all CSI/CVI analysis parameters:
    # CSI, CVI, Modified_CSI, SD1, SD2, SD1/SD2 ratio, SampEn
    # for key in hrv_csi_cvi_features.keys():
    #     params.update({phase[2:] + "_" + key: hrv_csi_cvi_features[key]})

    return params

示例#2

0

显示文件

文件： Cardiac_features_computation_wrapper.py 项目： Aura-healthcare/seizure_detector_exploration

def get_clean_intervals(rrs):
    # This remove outliers from signal
    rr_intervals_without_outliers = remove_outliers(rr_intervals=rrs,
                                                    low_rri=300,
                                                    high_rri=1800)
    # This replace outliers nan values with linear interpolation
    interpolated_rr_intervals = interpolate_nan_values(
        rr_intervals=rr_intervals_without_outliers,
        interpolation_method="linear")

    # This remove ectopic beats from signal
    nn_intervals_list = remove_ectopic_beats(
        rr_intervals=interpolated_rr_intervals, method="malik")
    # This replace ectopic beats nan values with linear interpolation
    interpolated_nn_intervals = interpolate_nan_values(
        rr_intervals=nn_intervals_list)
    median_interpolated_nn_intervals = signal.medfilt(
        interpolated_nn_intervals, 5)

    return median_interpolated_nn_intervals

示例#3

0

显示文件

文件： hrv_analysis.py 项目： Aura-healthcare/Geneva_Health_Forum-HeartyPatch

def compute_hf_lf(data: dict,
                  sampling_frequency: int = 128,
                  preprocessing=False):

    rr_intervals_list = data['hamilton']['rr_intervals']

    # Préprocessing option

    if preprocessing:

        # Processing pré-pipeline
        # This remove outliers from signal
        rr_intervals_without_outliers = remove_outliers(
            rr_intervals=rr_intervals_list, low_rri=300, high_rri=2000)

        # This replace outliers nan values with linear interpolation
        interpolated_rr_intervals = interpolate_nan_values(
            rr_intervals=rr_intervals_without_outliers,
            interpolation_method="linear")

        # This remove ectopic beats from signal
        nn_intervals_list = remove_ectopic_beats(
            rr_intervals=interpolated_rr_intervals, method="malik")
        # This replace ectopic beats nan values with linear interpolation
        interpolated_nn_intervals = interpolate_nan_values(
            rr_intervals=nn_intervals_list)

    else:
        interpolated_nn_intervals = rr_intervals_list

    # adding frequency analysis

    time_domain_features = get_frequency_domain_features(
        nn_intervals=interpolated_nn_intervals,
        sampling_frequency=sampling_frequency)

    return time_domain_features['hfnu'], time_domain_features['lfnu']

示例#4

0

显示文件

文件： hrv_analysis.py 项目： Aura-healthcare/Geneva_Health_Forum-HeartyPatch

def generate_psd_plot_hamilton(data: dict, sampling_frequency: int = 128):

    rr_intervals_list = data['hamilton']['rr_intervals']

    # Processing pré-pipeline
    # This remove outliers from signal
    rr_intervals_without_outliers = remove_outliers(
        rr_intervals=rr_intervals_list, low_rri=300, high_rri=2000)

    # This replace outliers nan values with linear interpolation
    interpolated_rr_intervals = interpolate_nan_values(
        rr_intervals=rr_intervals_without_outliers,
        interpolation_method="linear")

    # This remove ectopic beats from signal
    nn_intervals_list = remove_ectopic_beats(
        rr_intervals=interpolated_rr_intervals, method="malik")
    # This replace ectopic beats nan values with linear interpolation
    interpolated_nn_intervals = interpolate_nan_values(
        rr_intervals=nn_intervals_list)

    plot_psd(interpolated_nn_intervals,
             method="lomb",
             sampling_frequency=sampling_frequency)

示例#5

0

显示文件

文件： record-viewer.py 项目： jlc/ecg-hrv-analysis

def plotCompareWithInterpolatedValues(times, samples, rrTimes, rrValues):
    def listSecToMsec(secs):
        msecs = []
        for i in range(len(secs)):
            msecs.append(int(secs[i] * 1000))
        return msecs

    def listMsecToSec(msecs):
        secs = []
        for i in range(len(msecs)):
            secs.append(float(msecs[i]) / 1000)
        return secs

    rrValuesMsec = listSecToMsec(rrValues)

    # Remove outliers + interpolate + remove ectopic + interpolate

    # This remove outliers from signal
    """
  rr_intervals_without_outliers = remove_outliers(rr_intervals=rrValuesMsec,
                                                  low_rri=300, high_rri=2000)

  # This replace outliers nan values with linear interpolation
  interpolated_rr_intervals = interpolate_nan_values(rr_intervals=rr_intervals_without_outliers,
                                                     interpolation_method="linear")


  # This remove ectopic beats from signal
  nn_intervals_list = remove_ectopic_beats(rr_intervals=interpolated_rr_intervals, method="malik")
  """

    # This replace ectopic beats nan values with linear interpolation
    interpolated_nn_intervals = interpolate_nan_values(
        rr_intervals=rrValuesMsec)

    #interpolated_nn_intervals_sec = listMsecToSec(interpolated_nn_intervals)

    print("rrValuesMsec ==> " + str(rrValuesMsec[:1000]))
    print("interpolated_nn_intervals ==> " +
          str(interpolated_nn_intervals[:1000]))

    plotCompareRecordsWithRR("Non-interpolated", times, samples, rrTimes,
                             rrValues, "Interpolated", times, samples,
                             interpolated_nn_intervals, rrValues)

示例#6

0

显示文件

def getSingleIBIfeatures(data):
    """
    INPUT:
        data: Dataframe of IBI values mapped to timestamps
    OUTPUT:
        A single IBI feature vector
        For more information: https://aura-healthcare.github.io/hrvanalysis/hrvanalysis.html
    """
    if data.empty:
        return None
    IBI_data = data['IBI'].astype(float) * 1000
    # This remove ectopic beats from signal
    nn_intervals_list = remove_ectopic_beats(rr_intervals=IBI_data,
                                             method="malik")
    # This replace ectopic beats nan values with linear interpolation
    interpolated_nn_intervals = interpolate_nan_values(
        rr_intervals=nn_intervals_list)
    if not interpolated_nn_intervals[-1] > 1 and len(
            interpolated_nn_intervals) == 2:
        interpolated_nn_intervals[-1] = interpolated_nn_intervals[0]
    if not interpolated_nn_intervals[-1] > 1:
        interpolated_nn_intervals[-1] = np.median(
            interpolated_nn_intervals[1:-1])
    if not interpolated_nn_intervals[0] > 1:
        interpolated_nn_intervals[0] = np.median(
            interpolated_nn_intervals[1:-1])
    # get features
    time_features = get_time_domain_features(interpolated_nn_intervals)
    freq_features = get_frequency_domain_features(interpolated_nn_intervals,
                                                  method='lomb')
    IBI_features_df = pd.DataFrame({
        **time_features,
        **freq_features
    },
                                   index=[0])
    # IBI_features_df.insert(0, "participant", participant)
    return IBI_features_df

示例#7

0

显示文件

def getIBIfeatures(data, time_window):
    """
        INPUT:
            data: Dataframe of IBI values mapped to timestamps
        OUTPUT:
            IBI features
            For more information: https://aura-healthcare.github.io/hrvanalysis/hrvanalysis.html
    """
    timestamp = data.timestamp.values
    IBI_data = np.array(data['IBI'].astype(float) * 1000)

    time_features_nn = np.zeros((1, 16))
    freq_features_nn = np.zeros((1, 7))
    timestamps = [0]
    for t in timestamp:
        if t >= timestamp[-1] - time_window:
            break
        curr_time = round(t + time_window)
        if curr_time in timestamps:
            continue
        timestamps.append(pd.to_datetime(curr_time, unit='s'))
        index_less = timestamp <= (t + time_window)
        index_larger = timestamp >= t
        index = index_less & index_larger
        curr_rr_interval = IBI_data[index]

        # This remove ectopic beats from signal
        nn_intervals_list = remove_ectopic_beats(rr_intervals=curr_rr_interval,
                                                 method="malik")
        # This replace ectopic beats nan values with linear interpolation
        interpolated_nn_intervals = interpolate_nan_values(
            rr_intervals=nn_intervals_list)
        if not interpolated_nn_intervals[-1] > 1 and len(
                interpolated_nn_intervals) == 2:
            interpolated_nn_intervals[-1] = interpolated_nn_intervals[0]
        if not interpolated_nn_intervals[-1] > 1:
            interpolated_nn_intervals[-1] = np.median(
                interpolated_nn_intervals[1:-1])
        if not interpolated_nn_intervals[0] > 1:
            interpolated_nn_intervals[0] = np.median(
                interpolated_nn_intervals[1:-1])

        time_domain_features = get_time_domain_features(
            interpolated_nn_intervals)
        time_features_nn = np.vstack(
            (time_features_nn,
             np.array([
                 time_domain_features['mean_nni'],
                 time_domain_features['sdnn'], time_domain_features['sdsd'],
                 time_domain_features['nni_50'],
                 time_domain_features['pnni_50'],
                 time_domain_features['nni_20'],
                 time_domain_features['pnni_20'],
                 time_domain_features['rmssd'],
                 time_domain_features['median_nni'],
                 time_domain_features['range_nni'],
                 time_domain_features['cvsd'], time_domain_features['cvnni'],
                 time_domain_features['mean_hr'],
                 time_domain_features['max_hr'],
                 time_domain_features['min_hr'], time_domain_features['std_hr']
             ]).reshape(1, 16)))
        freq_domain_features = get_frequency_domain_features(
            interpolated_nn_intervals, method='lomb')
        freq_features_nn = np.vstack(
            (freq_features_nn,
             np.array([
                 freq_domain_features['lf'], freq_domain_features['hf'],
                 freq_domain_features['lf_hf_ratio'],
                 freq_domain_features['lfnu'], freq_domain_features['hfnu'],
                 freq_domain_features['total_power'],
                 freq_domain_features['vlf']
             ]).reshape(1, 7)))
    IBI_features = np.hstack((np.array(timestamps[1:]).reshape(
        (-1, 1)), time_features_nn[1:, :], freq_features_nn[1:, :]))
    IBI_features_df = pd.DataFrame(
        IBI_features,
        columns=[
            'timestamp', 'mean_nni', 'sdnn', 'sdsd', 'nni_50', 'pnni_50',
            'nni_20', 'pnni_20', 'rmssd', 'median_nni', 'range_nni', 'cvsd',
            'cvnni', 'mean_hr', 'max_hr', 'min_hr', 'std_hr', 'lf', 'hf',
            'lf_hf_ratio', 'lfnu', 'hfnu', 'total_power', 'vlf'
        ])
    # IBI_features_df.insert(0, "participant", participant)
    return IBI_features_df

示例#8

0

显示文件

    def process_all_files(self, is_test=False):
        '''
        This function will go through every subject overlapped data and extract the intersect set between hr and acc.
        the dataset quality control will filter out the RRI dataset with lower bound= 300, upper bound with 1000
        the output will be in either test output path or the actual output path.
        :param is_test: true is for test dataset
        :return:
        '''
        # load Acc, HR and overlap files
        if is_test:
            all_acc_files = []
            all_hr_files = []
        else:
            all_acc_files = os.listdir(self.acc_path)
            all_hr_files = os.listdir(self.hr_path)
        overlap_df = pd.read_csv(
            self.overlap_path
        )  # only do experiment if they have overlapped ECG and Actigraphy
        total_subjects_list = overlap_df['mesaid'].unique()
        valid_pids = pd.read_csv(
            self.cfg.TRAIN_TEST_SPLIT)['uids'].values.tolist()
        # here we set the valid subject IDs according to a snapshot of MESA data on 2019-05-01. In this
        # snapshot, we manually checked the aligned data making sure the pre-processing yield satisfied quality of data.
        # ##### The num of total valid subjects should be 1743
        total_subjects_list = list(
            set(total_subjects_list).intersection(set(valid_pids)))
        total_processed = []
        if not os.path.exists(self.processed_records):
            with open(self.processed_records, "w") as output:
                writer = csv.writer(output, lineterminator='\n')
                writer.writerows(total_processed)
        # tag = datetime.now().strftime("%Y%m%d-%H%M%S")
        for PID in total_subjects_list:
            mesa_id = "%04d" % PID
            # filter Acc and HR based on the overlap records
            print('*' * 100)
            print("Processing subject %s dataset" % mesa_id)
            acc_inlist_idx = [s for s in all_acc_files if mesa_id in s]
            hr_inlist_idx = [s for s in all_hr_files if mesa_id in s]
            feature_list = []
            if len(acc_inlist_idx) > 0 and len(hr_inlist_idx) > 0:
                # get the raw dataset file index
                acc_file_idx = all_acc_files.index(''.join(acc_inlist_idx))
                hr_file_idx = all_hr_files.index(''.join(hr_inlist_idx))
                # load Acc and HR into Pandas
                acc_df = pd.read_csv(
                    os.path.join(self.acc_path, all_acc_files[acc_file_idx]))
                hr_df = pd.read_csv(
                    os.path.join(self.hr_path, all_hr_files[hr_file_idx]))
                featnames = get_statistic_feature(acc_df,
                                                  column_name="activity",
                                                  windows_size=20)
                acc_start_idx = overlap_df[overlap_df['mesaid'] ==
                                           PID]['line'].values[0].astype(int)
                acc_epochs = hr_df['epoch'].max()
                # cut the dataset frame from the overlapped start index to the HR end index
                acc_df = acc_df[acc_start_idx - 1:acc_start_idx + acc_epochs -
                                1]
                # recalculate the line to the correct index
                acc_df['line'] = acc_df['line'] - acc_start_idx + 1
                acc_df = acc_df.reset_index(drop=True)
                # calculate the intersect set between HR and acc and cut HR to align the sequence
                # ################ Data quality control for Acc ########################
                # use marker and activity as the indicator column if the shape is different to 2-dim then drop

                list_size_chk = np.array(acc_df[['marker',
                                                 'activity']].values.tolist())
                # check whether the activity is empty
                if len(list_size_chk.shape) < 2:
                    print(
                        "File {f_name} doesn't meet dimension requirement, it's size is {wrong_dim}"
                        .format(f_name=all_acc_files[acc_file_idx],
                                wrong_dim=list_size_chk.shape))
                    continue

                # Cut HRV dataset based on length of Actigraphy dataset
                if (int(hr_df['epoch'].tail(1)) > acc_df.shape[0]):
                    hr_df = hr_df[hr_df['epoch'] <= acc_df.shape[0]]
                # remove the noise data points if two peaks overlapped or not wear
                hr_df = hr_df[hr_df['TPoint'] > 0]
                # Define RR intervals by taking the difference between each one of the measurements in seconds (*1k)
                hr_df['RR Intervals'] = hr_df['seconds'].diff() * 1000
                hr_df['RR Intervals'].fillna(
                    hr_df['RR Intervals'].mean(),
                    inplace=True)  # fill mean for first sample

                # old method for processing of RR intervals which is inappropriate
                # sampling_df = pd.concat([sampling_df, t1], axis =0 )
                # outlier_low = np.mean(hr_df['HR']) - 6 * np.std(hr_df['HR'])
                # outlier_high = np.mean(hr_df['HR']) + 6 * np.std(hr_df['HR'])
                # hr_df = hr_df[hr_df['HR'] >= outlier_low]
                # hr_df = hr_df[hr_df['HR'] <= outlier_high]

                # apply HRV-Analysis package
                # filter any hear rate over 60000/300 = 200, 60000/2000 = 30
                clean_rri = hr_df['RR Intervals'].values
                clean_rri = hrvana.remove_outliers(rr_intervals=clean_rri,
                                                   low_rri=300,
                                                   high_rri=2000)
                clean_rri = hrvana.interpolate_nan_values(
                    rr_intervals=clean_rri, interpolation_method="linear")
                clean_rri = hrvana.remove_ectopic_beats(rr_intervals=clean_rri,
                                                        method="malik")
                clean_rri = hrvana.interpolate_nan_values(
                    rr_intervals=clean_rri)

                hr_df["RR Intervals"] = clean_rri
                # calculate the Heart Rate
                hr_df['HR'] = np.round((60000.0 / hr_df['RR Intervals']), 0)
                # filter ACC
                acc_df = acc_df[acc_df['interval'] != 'EXCLUDED']
                # filter RRI
                t1 = hr_df.epoch.value_counts().reset_index().rename(
                    {
                        'index': 'epoch_idx',
                        'epoch': 'count'
                    }, axis=1)
                invalid_idx = set(t1[t1['count'] < 3]['epoch_idx'].values)
                del t1
                hr_df = hr_df[~hr_df['epoch'].isin(list(invalid_idx))]
                # get intersect epochs
                hr_epoch_set = set(hr_df['epoch'].values)
                acc_epoch_set = set(acc_df['line'])  # get acc epochs
                # only keep intersect dataset
                diff_epoch_set_a = acc_epoch_set.difference(hr_epoch_set)
                diff_epoch_set_b = hr_epoch_set.difference(acc_epoch_set)
                acc_df = acc_df[~acc_df['line'].isin(diff_epoch_set_a)]
                hr_df = hr_df[~hr_df['epoch'].isin(diff_epoch_set_b)]
                # check see if their epochs are equal
                assert acc_df.shape[0] == len(hr_df['epoch'].unique())
                # filter out any epochs with rri less than 3
                hr_epoch_set = set(hr_df['epoch'].values)
                hr_epoch_set = hr_epoch_set.difference(invalid_idx)
                for _, hr_epoch_idx in enumerate(list(hr_epoch_set)):
                    # sliding window
                    gt_label = hr_df[hr_df['epoch'] ==
                                     hr_epoch_idx]["stage"].values[0]
                    if self.hrv_win != 0:
                        offset = int(np.floor(self.hrv_win / 2))
                        tmp_hr_df = hr_df[hr_df['epoch'].isin(
                            np.arange(hr_epoch_idx - offset,
                                      hr_epoch_idx + offset))]
                    else:
                        tmp_hr_df = hr_df[hr_df['epoch'] == hr_epoch_idx]
                    try:  # check to see if the first time stamp is empty
                        start_sec = float(tmp_hr_df['seconds'].head(1) * 1.0)
                    except Exception as ee:
                        print("Exception %s, source dataset: %s" %
                              (ee, tmp_hr_df['seconds'].head(1)))
                    # calculate each epochs' HRV features
                    rr_epoch = tmp_hr_df['RR Intervals'].values
                    all_hr_features = {}
                    try:
                        all_hr_features.update(
                            hrvana.get_time_domain_features(rr_epoch))
                    except Exception as ee:
                        self.log_process(ee, PID, hr_epoch_idx)
                        print("processed time domain features: {}".format(
                            str(ee)))
                    try:
                        all_hr_features.update(
                            hrvana.get_frequency_domain_features(rr_epoch))
                    except Exception as ee:
                        self.log_process(ee, PID, hr_epoch_idx)
                        print("processed frequency domain features: {}".format(
                            str(ee)))
                    try:
                        all_hr_features.update(
                            hrvana.get_poincare_plot_features(rr_epoch))
                    except Exception as ee:
                        self.log_process(ee, PID, hr_epoch_idx)
                        print("processed poincare features: {}".format(
                            str(ee)))
                    try:
                        all_hr_features.update(
                            hrvana.get_csi_cvi_features(rr_epoch))
                    except Exception as ee:
                        self.log_process(ee, PID, hr_epoch_idx)
                        print("processed csi cvi domain features: {}".format(
                            str(ee)))
                    try:
                        all_hr_features.update(
                            hrvana.get_geometrical_features(rr_epoch))
                    except Exception as ee:
                        self.log_process(ee, PID, hr_epoch_idx)
                        print("processed geometrical features: {}".format(
                            str(ee)))

                    all_hr_features.update({
                        'stages':
                        gt_label,
                        'mesaid':
                        acc_df[acc_df['line'] ==
                               hr_epoch_idx]['mesaid'].values[0],
                        'linetime':
                        acc_df[acc_df['line'] ==
                               hr_epoch_idx]['linetime'].values[0],
                        'line':
                        acc_df[acc_df['line'] ==
                               hr_epoch_idx]['line'].values[0],
                        'wake':
                        acc_df[acc_df['line'] ==
                               hr_epoch_idx]['wake'].values[0],
                        'interval':
                        acc_df[acc_df['line'] ==
                               hr_epoch_idx]['interval'].values[0],
                        'activity':
                        acc_df[acc_df['line'] == hr_epoch_idx]
                        ['activity'].values[0]
                    })
                    feature_list.append(all_hr_features)

            #  If feature list is not empty
            if len(feature_list) > 0:
                hrv_acc_df = pd.DataFrame(feature_list)
                hrv_acc_df = hrv_acc_df.reset_index(drop=True)
                del hrv_acc_df['tinn']  # tinn is empty
                featnames = featnames + ["line"]
                combined_pd = pd.merge(acc_df[featnames],
                                       hrv_acc_df,
                                       on='line',
                                       how='inner')
                #combined_pd = combined_pd.reset_index(drop=True)
                combined_pd['timestamp'] = pd.to_datetime(
                    combined_pd['linetime'])
                combined_pd['base_time'] = pd.to_datetime('00:00:00')
                combined_pd['seconds'] = (combined_pd['timestamp'] -
                                          combined_pd['base_time'])
                combined_pd['seconds'] = combined_pd['seconds'].dt.seconds
                combined_pd.drop(['timestamp', 'base_time'],
                                 axis=1,
                                 inplace=True)
                combined_pd['two_stages'] = combined_pd["stages"].apply(
                    lambda x: 1.0 if x >= 1.0 else 0.0)
                combined_pd.loc[combined_pd['stages'] > 4,
                                'stages'] = 4  # make sure rem sleep label is 4
                combined_pd = combined_pd.fillna(combined_pd.median())
                combined_pd = combined_pd[
                    combined_pd['interval'] != 'EXCLUDED']
                aligned_data = self.output_path

                # standardise and normalise the df
                feature_list = combined_pd.columns.to_list()
                std_feature = [
                    x for x in feature_list if x not in [
                        'two_stages', 'seconds', 'interval', 'wake',
                        'linetime', 'mesaid', 'stages', 'line'
                    ]
                ]
                if self.standarize:
                    standardize_df_given_feature(combined_pd,
                                                 std_feature,
                                                 df_name='combined_df',
                                                 simple_method=False)
                combined_pd.to_csv(os.path.join(aligned_data,
                                                (mesa_id + '_combined.csv')),
                                   index=False)
                print("ID: {}, successed process".format(mesa_id))
                with open(self.processed_records, "a") as text_file:
                    text_file.write(
                        "ID: {}, successed process \n".format(mesa_id))
                total_processed.append(
                    "ID: {}, successed process".format(mesa_id))
            else:
                print("Acc is empty or HRV is empty!")
                total_processed.append(
                    "ID: {}, failed process".format(mesa_id))
                with open(self.processed_records, "a") as text_file:
                    text_file.write("ID: {}, failed process".format(mesa_id))