Пример #1
0
def feature_extract(dt):
    
    import tsfresh.feature_extraction.feature_calculators as fc
    
    ft = {
        'abs_energy': fc.abs_energy(dt),
        'sum_values': fc.sum_values(dt),
        'mean': fc.mean(dt),
        'maximum': fc.maximum(dt),
        'minimum': fc.minimum(dt),
        'median': fc.median(dt),
        'quantile_0.1': fc.quantile(dt, 0.1),
        'quantile_0.2': fc.quantile(dt, 0.2),
        'quantile_0.3': fc.quantile(dt, 0.3),
        'quantile_0.4': fc.quantile(dt, 0.4),
        'quantile_0.5': fc.quantile(dt, 0.5),
        'quantile_0.6': fc.quantile(dt, 0.6),
        'quantile_0.7': fc.quantile(dt, 0.7),
        'quantile_0.8': fc.quantile(dt, 0.8),
        'quantile_0.9': fc.quantile(dt, 0.9),
        #
        # TODO:
        # Below functions dont works well -> need to be checked!!
        #
        #'fft_coefficient__coeff_0__attr_real': fc.fft_coefficient(dt {"coeff": 0, "attr": "real"}),
        #'fft_coefficient__coeff_0__attr_imag': fc.fft_coefficient(dt {"coeff": 0, "attr": "imag"}),
        #'fft_coefficient__coeff_0__attr_abs': fc.fft_coefficient(dt {"coeff": 0, "attr": "abs"}),
        #'fft_coefficient__coeff_0__attr_angle': fc.fft_coefficient(dt {"coeff": 0, "attr": "angle"}),
        #
        #=> Mr. Huy just fix this issue with above function fft_ft !!
    }
    
    ft.update(fft_ft(dt))
    
    return ft
Пример #2
0
def get_feature_date(daydf):
    daydf = daydf.drop("charttime", axis=1)
    daydf = daydf.drop("subject_id", axis=1)
    daydf = daydf.drop("icustay_id", axis=1)
    retval = np.zeros([1, 0], dtype=float)
    retval = pd.DataFrame(retval)
    for column in daydf:
        #print(retval.shape)
        if ((column == "heart_rate") | (column == "abp_systolic")
                | (column == "gcs_total") | (column == "platelets") |
            (column == "creatinine")):
            t = scalar_feature_extraction(daydf[column])
            t = pd.DataFrame(t)
            retval = pd.concat([retval, t], axis=1)
        elif ((column == "weight") | (column == "age")):
            t = np.zeros([1, 1], dtype=float)
            t[0][0] = daydf.iloc[0][0]
            t = pd.DataFrame(t)
            retval = pd.concat([retval, t], axis=1)
        elif (column == "is_dead_in_n_hours"):
            t = np.zeros([1, 1], dtype=float)
            t[0][0] = tffe.maximum(daydf[column].values)
            t = pd.DataFrame(t)
            retval = pd.concat([retval, t], axis=1)
        else:
            t = logistic_feature_extraction(daydf[column])
            t = pd.DataFrame(t)
            retval = pd.concat([retval, t], axis=1)
    return retval
Пример #3
0
    def get_sta_features(self, data):
        """
        Calculate the value of 9 kinds of selected statistical features
        :param data:
        :return:
        """
        def _cal_trend(data):
            time_list = np.arange(len(data))
            # create linear regression object
            regr = linear_model.LinearRegression()
            regr.fit(time_list.reshape(-1, 1), np.array(data).reshape(-1, 1))

            return regr.coef_[0][0]

        E = ts.abs_energy(data)
        S = ts.binned_entropy(data, max_bins=5)
        ro = ts.autocorrelation(data, lag=4)
        skewness = ts.skewness(data)
        kurtosis = ts.kurtosis(data)
        trend = _cal_trend(data)
        mean = ts.mean(data)
        min = ts.minimum(data)
        max = ts.maximum(data)

        return [E, S, ro, skewness, kurtosis, trend, mean, min, max]
def TS_feature3(signal):
    max_ts = ts.maximum(signal)
    mean_rs = ts.mean(signal)
    mean_abs_change = ts.mean_abs_change(signal)
    mean_change = ts.mean_change(signal)
    median_ts = ts.median(signal)
    minimum_ts = ts.minimum(signal)
    return max_ts, mean_rs, mean_abs_change, mean_change, median_ts, minimum_ts
Пример #5
0
def time_series_maximum(x):
    """
    :param x: the time series to calculate the feature of
    :type x: pandas.Series
    :return: the value of this feature
    :return type: float
    """
    return ts_feature_calculators.maximum(x)
Пример #6
0
def time_series_maximum(x):
    """
    序列x的最大值
    :param x: x
    :type x: pandas.Series
    :return: the value of this feature
    :return type: float
    """
    return ts_feature_calculators.maximum(x)
Пример #7
0
def scalar_feature_extraction(column):
    retval = np.zeros([1, 10], dtype=float)
    retval[0][0] = tffe.count_above_mean(column.values)
    retval[0][1] = tffe.mean(column.values)
    retval[0][2] = tffe.maximum(column.values)
    retval[0][3] = tffe.median(column.values)
    retval[0][4] = tffe.minimum(column.values)
    retval[0][5] = tffe.sample_entropy(column.values)
    if (isNaN(retval[0][5])):
        retval[0][5] = 0
    retval[0][6] = tffe.skewness(column.values)
    retval[0][7] = tffe.variance(column.values)
    retval[0][8] = tffe.longest_strike_above_mean(column.values)
    retval[0][9] = tffe.longest_strike_below_mean(column.values)
    return retval
Пример #8
0
def translate_to_hourly(df):
    col_names = df.columns.values
    new_df = pd.DataFrame(columns=col_names)
    interim_df = pd.DataFrame(columns=col_names)
    curr_icu_stay_id = df.iloc[0].loc['icustay_id']
    curr_hour = datetime.strptime(str(df.iloc[0].loc['charttime']),
                                  "%Y-%m-%d %H:%M:%S")
    for index in range(len(df.index)):
        print("index: ", index)
        row_icu_stay_id = df.iloc[index].loc['icustay_id']
        row_hour = datetime.strptime(str(df.iloc[index].loc['charttime']),
                                     "%Y-%m-%d %H:%M:%S")

        if ((row_icu_stay_id == curr_icu_stay_id) &
            (is_same_hour(curr_hour, row_hour))):
            print(df.iloc[index].loc['charttime'])
            interim_df = interim_df.append(df.iloc[index], ignore_index=True)
        else:
            new_row = interim_df.mean(axis=0)
            new_row.loc['charttime'] = curr_hour.strftime("%Y-%m-%d %H:%M:%S")
            new_row.loc['is_dead_in_n_hours'] = tffe.maximum(
                interim_df['is_dead_in_n_hours'].values)
            curr_hour = row_hour
            curr_icu_stay_id = row_icu_stay_id
            new_df = new_df.append(new_row, ignore_index=True)
            interim_df = pd.DataFrame(columns=col_names)
            interim_df = interim_df.append(df.iloc[index], ignore_index=True)
            print("completed hour")
            ''' # commented stuff used during testing
            print("-----------------------------------------------------------------")
            print("average: \n", new_row)
            print("end of average")
            for index, row in interim_df.iterrows():
                print(row)
            #x = input("Pause:")'''

    #print(new_df.head())
    new_row = interim_df.mean(axis=0)
    new_row.loc['charttime'] = curr_hour.strftime("%Y-%m-%d %H:%M:%S")
    new_df = new_df.append(new_row, ignore_index=True)

    return new_df
Пример #9
0
def extract_features(data):
    day = 24 * 60

    return list(
        numpy.nan_to_num(
            numpy.array([
                feature.symmetry_looking(data, [{
                    'r': 0.3
                }])[0][1],
                feature.variance_larger_than_standard_deviation(data).bool(),
                feature.ratio_beyond_r_sigma(data, 2),
                feature.has_duplicate_max(data),
                feature.has_duplicate_min(data),
                feature.has_duplicate(data),
                feature.agg_autocorrelation(numpy.array(data.value),
                                            [{
                                                'f_agg': 'mean',
                                                'maxlag': day
                                            }])[0][1],
                feature.partial_autocorrelation(data, [{
                    'lag': day
                }])[0][1],
                feature.abs_energy(numpy.array(data.value)),
                feature.mean_change(data),
                feature.mean_second_derivative_central(data),
                feature.median(data),
                float(feature.mean(data)),
                float(feature.standard_deviation(data)),
                float(feature.longest_strike_below_mean(data)),
                float(feature.longest_strike_above_mean(data)),
                int(feature.number_peaks(data, 10)),
                feature.linear_trend(numpy.array(data.value), [{
                    'attr': 'rvalue'
                }])[0][1],
                feature.c3(data, day),
                float(feature.maximum(data)),
                float(feature.minimum(data))
            ])))
def get_maximum(arr):
    res = np.array([maximum(arr)])
    res = np.nan_to_num(res)
    return res