def feature_extract(dt): import tsfresh.feature_extraction.feature_calculators as fc ft = { 'abs_energy': fc.abs_energy(dt), 'sum_values': fc.sum_values(dt), 'mean': fc.mean(dt), 'maximum': fc.maximum(dt), 'minimum': fc.minimum(dt), 'median': fc.median(dt), 'quantile_0.1': fc.quantile(dt, 0.1), 'quantile_0.2': fc.quantile(dt, 0.2), 'quantile_0.3': fc.quantile(dt, 0.3), 'quantile_0.4': fc.quantile(dt, 0.4), 'quantile_0.5': fc.quantile(dt, 0.5), 'quantile_0.6': fc.quantile(dt, 0.6), 'quantile_0.7': fc.quantile(dt, 0.7), 'quantile_0.8': fc.quantile(dt, 0.8), 'quantile_0.9': fc.quantile(dt, 0.9), # # TODO: # Below functions dont works well -> need to be checked!! # #'fft_coefficient__coeff_0__attr_real': fc.fft_coefficient(dt {"coeff": 0, "attr": "real"}), #'fft_coefficient__coeff_0__attr_imag': fc.fft_coefficient(dt {"coeff": 0, "attr": "imag"}), #'fft_coefficient__coeff_0__attr_abs': fc.fft_coefficient(dt {"coeff": 0, "attr": "abs"}), #'fft_coefficient__coeff_0__attr_angle': fc.fft_coefficient(dt {"coeff": 0, "attr": "angle"}), # #=> Mr. Huy just fix this issue with above function fft_ft !! } ft.update(fft_ft(dt)) return ft
def get_feature_date(daydf): daydf = daydf.drop("charttime", axis=1) daydf = daydf.drop("subject_id", axis=1) daydf = daydf.drop("icustay_id", axis=1) retval = np.zeros([1, 0], dtype=float) retval = pd.DataFrame(retval) for column in daydf: #print(retval.shape) if ((column == "heart_rate") | (column == "abp_systolic") | (column == "gcs_total") | (column == "platelets") | (column == "creatinine")): t = scalar_feature_extraction(daydf[column]) t = pd.DataFrame(t) retval = pd.concat([retval, t], axis=1) elif ((column == "weight") | (column == "age")): t = np.zeros([1, 1], dtype=float) t[0][0] = daydf.iloc[0][0] t = pd.DataFrame(t) retval = pd.concat([retval, t], axis=1) elif (column == "is_dead_in_n_hours"): t = np.zeros([1, 1], dtype=float) t[0][0] = tffe.maximum(daydf[column].values) t = pd.DataFrame(t) retval = pd.concat([retval, t], axis=1) else: t = logistic_feature_extraction(daydf[column]) t = pd.DataFrame(t) retval = pd.concat([retval, t], axis=1) return retval
def get_sta_features(self, data): """ Calculate the value of 9 kinds of selected statistical features :param data: :return: """ def _cal_trend(data): time_list = np.arange(len(data)) # create linear regression object regr = linear_model.LinearRegression() regr.fit(time_list.reshape(-1, 1), np.array(data).reshape(-1, 1)) return regr.coef_[0][0] E = ts.abs_energy(data) S = ts.binned_entropy(data, max_bins=5) ro = ts.autocorrelation(data, lag=4) skewness = ts.skewness(data) kurtosis = ts.kurtosis(data) trend = _cal_trend(data) mean = ts.mean(data) min = ts.minimum(data) max = ts.maximum(data) return [E, S, ro, skewness, kurtosis, trend, mean, min, max]
def TS_feature3(signal): max_ts = ts.maximum(signal) mean_rs = ts.mean(signal) mean_abs_change = ts.mean_abs_change(signal) mean_change = ts.mean_change(signal) median_ts = ts.median(signal) minimum_ts = ts.minimum(signal) return max_ts, mean_rs, mean_abs_change, mean_change, median_ts, minimum_ts
def time_series_maximum(x): """ :param x: the time series to calculate the feature of :type x: pandas.Series :return: the value of this feature :return type: float """ return ts_feature_calculators.maximum(x)
def time_series_maximum(x): """ 序列x的最大值 :param x: x :type x: pandas.Series :return: the value of this feature :return type: float """ return ts_feature_calculators.maximum(x)
def scalar_feature_extraction(column): retval = np.zeros([1, 10], dtype=float) retval[0][0] = tffe.count_above_mean(column.values) retval[0][1] = tffe.mean(column.values) retval[0][2] = tffe.maximum(column.values) retval[0][3] = tffe.median(column.values) retval[0][4] = tffe.minimum(column.values) retval[0][5] = tffe.sample_entropy(column.values) if (isNaN(retval[0][5])): retval[0][5] = 0 retval[0][6] = tffe.skewness(column.values) retval[0][7] = tffe.variance(column.values) retval[0][8] = tffe.longest_strike_above_mean(column.values) retval[0][9] = tffe.longest_strike_below_mean(column.values) return retval
def translate_to_hourly(df): col_names = df.columns.values new_df = pd.DataFrame(columns=col_names) interim_df = pd.DataFrame(columns=col_names) curr_icu_stay_id = df.iloc[0].loc['icustay_id'] curr_hour = datetime.strptime(str(df.iloc[0].loc['charttime']), "%Y-%m-%d %H:%M:%S") for index in range(len(df.index)): print("index: ", index) row_icu_stay_id = df.iloc[index].loc['icustay_id'] row_hour = datetime.strptime(str(df.iloc[index].loc['charttime']), "%Y-%m-%d %H:%M:%S") if ((row_icu_stay_id == curr_icu_stay_id) & (is_same_hour(curr_hour, row_hour))): print(df.iloc[index].loc['charttime']) interim_df = interim_df.append(df.iloc[index], ignore_index=True) else: new_row = interim_df.mean(axis=0) new_row.loc['charttime'] = curr_hour.strftime("%Y-%m-%d %H:%M:%S") new_row.loc['is_dead_in_n_hours'] = tffe.maximum( interim_df['is_dead_in_n_hours'].values) curr_hour = row_hour curr_icu_stay_id = row_icu_stay_id new_df = new_df.append(new_row, ignore_index=True) interim_df = pd.DataFrame(columns=col_names) interim_df = interim_df.append(df.iloc[index], ignore_index=True) print("completed hour") ''' # commented stuff used during testing print("-----------------------------------------------------------------") print("average: \n", new_row) print("end of average") for index, row in interim_df.iterrows(): print(row) #x = input("Pause:")''' #print(new_df.head()) new_row = interim_df.mean(axis=0) new_row.loc['charttime'] = curr_hour.strftime("%Y-%m-%d %H:%M:%S") new_df = new_df.append(new_row, ignore_index=True) return new_df
def extract_features(data): day = 24 * 60 return list( numpy.nan_to_num( numpy.array([ feature.symmetry_looking(data, [{ 'r': 0.3 }])[0][1], feature.variance_larger_than_standard_deviation(data).bool(), feature.ratio_beyond_r_sigma(data, 2), feature.has_duplicate_max(data), feature.has_duplicate_min(data), feature.has_duplicate(data), feature.agg_autocorrelation(numpy.array(data.value), [{ 'f_agg': 'mean', 'maxlag': day }])[0][1], feature.partial_autocorrelation(data, [{ 'lag': day }])[0][1], feature.abs_energy(numpy.array(data.value)), feature.mean_change(data), feature.mean_second_derivative_central(data), feature.median(data), float(feature.mean(data)), float(feature.standard_deviation(data)), float(feature.longest_strike_below_mean(data)), float(feature.longest_strike_above_mean(data)), int(feature.number_peaks(data, 10)), feature.linear_trend(numpy.array(data.value), [{ 'attr': 'rvalue' }])[0][1], feature.c3(data, day), float(feature.maximum(data)), float(feature.minimum(data)) ])))
def get_maximum(arr): res = np.array([maximum(arr)]) res = np.nan_to_num(res) return res