def vect_extract(arr_in): dict_vect = {} dict_vect["abs_energy"] = extract.abs_energy(arr_in) dict_vect["mean_abs_change"] = extract.mean_abs_change(arr_in) dict_vect[ "mean_second_derivative_central"] = extract.mean_second_derivative_central( arr_in) dict_vect["partial_autocorrelation"] = extract.partial_autocorrelation( arr_in)[0][1] dict_vect["augmented_dickey_fuller"] = extract.augmented_dickey_fuller( arr_in)[0][1] dict_vect["gskew"] = extract.gskew(arr_in) dict_vect["stetson_mean"] = extract.stetson_mean(arr_in) dict_vect["count_above_mean"] = extract.count_above_mean(arr_in) dict_vect["longest_strike_below_mean"] = extract.longest_strike_below_mean( arr_in) dict_vect["wozniak"] = extract.wozniak(arr_in)[0][1] dict_vect["fft_coefficient"] = extract.fft_coefficient(arr_in)[0][1] dict_vect["ar_coefficient"] = extract.ar_coefficient(arr_in)[0][1] dict_vect["index_mass_quantile"] = extract.index_mass_quantile( arr_in)[0][1] dict_vect["number_cwt_peaks"] = extract.number_cwt_peaks(arr_in)[0][1] dict_vect["spkt_welch_density"] = extract.spkt_welch_density(arr_in) dict_vect["c3"] = extract.c3(arr_in) dict_vect["binned_entropy"] = extract.binned_entropy(arr_in) #dict_vect["svd_entropy"] = extract.svd_entropy(arr_in)[0][1] dict_vect["hjorth_complexity"] = extract.hjorth_complexity(arr_in) dict_vect["max_langevin_fixed_point"] = extract.max_langevin_fixed_point( arr_in) dict_vect["percent_amplitude"] = extract.percent_amplitude(arr_in)[0][1] dict_vect["cad_prob"] = extract.cad_prob(arr_in)[1][1] dict_vect["zero_crossing_derivative"] = extract.zero_crossing_derivative( arr_in)[0][1] dict_vect[ "detrended_fluctuation_analysis"] = extract.detrended_fluctuation_analysis( arr_in) #dict_vect["fisher_information"] = extract.fisher_information(arr_in)[0][1] dict_vect["higuchi_fractal_dimension"] = extract.higuchi_fractal_dimension( arr_in)[0][1] dict_vect["hurst_exponent"] = extract.hurst_exponent(arr_in) #dict_vect["largest_lyauponov_exponent"] = extract.largest_lyauponov_exponent(arr_in)[0][1] dict_vect["whelch_method"] = extract.whelch_method(arr_in)[0][1] dict_vect["find_freq"] = extract.find_freq(arr_in)[0][1] #dict_vect["flux_perc"] = extract.flux_perc(arr_in)['FluxPercentileRatioMid20'] dict_vect["range_cum_s"] = extract.range_cum_s(arr_in)['Rcs'] dict_vect["kurtosis"] = extract.kurtosis(arr_in) dict_vect["stetson_k"] = extract.stetson_k(arr_in) return pd.DataFrame.from_dict(dict_vect, orient="index", columns=["values"]), dict_vect
def test_prepro_all(): df = test_get_sampledata() df.head() df_out = transform.robust_scaler(df, drop=["Close_1"]) df_out = transform.standard_scaler(df, drop=["Close"]) df_out = transform.fast_fracdiff(df, ["Close", "Open"], 0.5) # df_out = transform.windsorization(df,"Close",para,strategy='both') df_out = transform.operations(df, ["Close"]) df_out = transform.triple_exponential_smoothing(df, ["Close"], 12, .2, .2, .2, 0) df_out = transform.naive_dec( copy.deepcopy(df), ["Close", "Open"] ) # The function parameter df is changed within the function causing upcoming functions to crash, passing a copy solves this df_out = transform.bkb(df, ["Close"]) df_out = transform.butter_lowpass_filter(df, ["Close"], 4) df_out = transform.instantaneous_phases(df, ["Close"]) df_out = transform.kalman_feat(df, ["Close"]) df_out = transform.perd_feat(df, ["Close"]) df_out = transform.fft_feat(df, ["Close"]) df_out = transform.harmonicradar_cw(df, ["Close"], 0.3, 0.2) df_out = transform.saw(df, ["Close", "Open"]) df_out = transform.modify(df, ["Close"]) df_out = transform.multiple_rolling(df, columns=["Close"]) df_out = transform.multiple_lags(df, start=1, end=3, columns=["Close"]) df_out = transform.prophet_feat(df.reset_index(), ["Close", "Open"], "Date", "D") # **Interaction** # The function parameter df is changed within the function causing upcoming functions to crash, passing a copy solves this df_out = interact.lowess(copy.deepcopy(df), ["Open", "Volume"], df["Close"], f=0.25, iter=3) df_out = interact.autoregression(copy.deepcopy(df)) df_out = interact.muldiv(copy.deepcopy(df), ["Close", "Open"]) df_out = interact.decision_tree_disc(copy.deepcopy(df), ["Close"]) df_out = interact.quantile_normalize(copy.deepcopy(df), drop=["Close"]) df_out = interact.tech(copy.deepcopy(df)) df_out = interact.genetic_feat(copy.deepcopy(df)) # **Mapping** df_out = mapper.pca_feature(df, variance_or_components=0.80, drop_cols=["Close_1"]) df_out = mapper.cross_lag(df) ''' Regarding https://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test chi square test assumes frequencies distribution and a frequency can't be a negative number. No familiar with the data but if it is safe to either shift them to have min > 0 or to normalize the data to be [0-1]. Since this is for the purpose of testing we'll be using: (df-df.min())/(df.max()-df.min()) ''' df_out = mapper.a_chi((df - df.min()) / (df.max() - df.min())) df_out = mapper.encoder_dataset(df, ["Close_1"], 15) df_out = mapper.lle_feat(df, ["Close_1"], 4) df_out = mapper.feature_agg(df, ["Close_1"], 4) df_out = mapper.neigh_feat(df, ["Close_1"], 4) # **Extraction** extract.abs_energy(df["Close"]) extract.cid_ce(df["Close"], True) extract.mean_abs_change(df["Close"]) extract.mean_second_derivative_central(df["Close"]) extract.variance_larger_than_standard_deviation(df["Close"]) # extract.var_index(df["Close"].values,var_index_param) extract.symmetry_looking(df["Close"]) extract.has_duplicate_max(df["Close"]) extract.partial_autocorrelation(df["Close"]) extract.augmented_dickey_fuller(df["Close"]) extract.gskew(df["Close"]) extract.stetson_mean(df["Close"]) extract.length(df["Close"]) extract.count_above_mean(df["Close"]) extract.longest_strike_below_mean(df["Close"]) extract.wozniak(df["Close"]) extract.last_location_of_maximum(df["Close"]) extract.fft_coefficient(df["Close"]) extract.ar_coefficient(df["Close"]) extract.index_mass_quantile(df["Close"]) extract.number_cwt_peaks(df["Close"]) extract.spkt_welch_density(df["Close"]) extract.linear_trend_timewise(df["Close"]) extract.c3(df["Close"]) extract.binned_entropy(df["Close"]) extract.svd_entropy(df["Close"].values) extract.hjorth_complexity(df["Close"]) extract.max_langevin_fixed_point(df["Close"]) extract.percent_amplitude(df["Close"]) extract.cad_prob(df["Close"]) extract.zero_crossing_derivative(df["Close"]) extract.detrended_fluctuation_analysis(df["Close"]) extract.fisher_information(df["Close"]) extract.higuchi_fractal_dimension(df["Close"]) extract.petrosian_fractal_dimension(df["Close"]) extract.hurst_exponent(df["Close"]) extract.largest_lyauponov_exponent(df["Close"]) extract.whelch_method(df["Close"]) extract.find_freq(df["Close"]) extract.flux_perc(df["Close"]) extract.range_cum_s(df["Close"]) ''' From https://github.com/firmai/deltapy#extraction example, It seems like the second argument of the function must be: struct_param = {"Volume":df["Volume"].values, "Open": df["Open"].values} ''' struct_param = {"Volume": df["Volume"].values, "Open": df["Open"].values} extract.structure_func(df["Close"], struct_param) extract.kurtosis(df["Close"]) extract.stetson_k(df["Close"])
def test_prepro_all(): from deltapy import transform, interact, mapper, extract df = data_copy() df.head() df_out = transform.robust_scaler(df, drop=["Close_1"]) df_out = transform.standard_scaler(df, drop=["Close"]) df_out = transform.fast_fracdiff(df, ["Close", "Open"], 0.5) #df_out = transform.windsorization(df,"Close",para,strategy='both') df_out = transform.operations(df, ["Close"]) df_out = transform.triple_exponential_smoothing(df, ["Close"], 12, .2, .2, .2, 0) df_out = transform.naive_dec(df, ["Close", "Open"]) df_out = transform.bkb(df, ["Close"]) df_out = transform.butter_lowpass_filter(df, ["Close"], 4) df_out = transform.instantaneous_phases(df, ["Close"]) df_out = transform.kalman_feat(df, ["Close"]) df_out = transform.perd_feat(df, ["Close"]) df_out = transform.fft_feat(df, ["Close"]) df_out = transform.harmonicradar_cw(df, ["Close"], 0.3, 0.2) df_out = transform.saw(df, ["Close", "Open"]) df_out = transform.modify(df, ["Close"]) df_out = transform.multiple_rolling(df, columns=["Close"]) df_out = transform.multiple_lags(df, start=1, end=3, columns=["Close"]) df_out = transform.prophet_feat(df.reset_index(), ["Close", "Open"], "Date", "D") #**Interaction** df_out = interact.lowess(df, ["Open", "Volume"], df["Close"], f=0.25, iter=3) df_out = interact.autoregression(df) df_out = interact.muldiv(df, ["Close", "Open"]) df_out = interact.decision_tree_disc(df, ["Close"]) df_out = interact.quantile_normalize(df, drop=["Close"]) df_out = interact.tech(df) df_out = interact.genetic_feat(df) #**Mapping** df_out = mapper.pca_feature(df, variance_or_components=0.80, drop_cols=["Close_1"]) df_out = mapper.cross_lag(df) df_out = mapper.a_chi(df) df_out = mapper.encoder_dataset(df, ["Close_1"], 15) df_out = mapper.lle_feat(df, ["Close_1"], 4) df_out = mapper.feature_agg(df, ["Close_1"], 4) df_out = mapper.neigh_feat(df, ["Close_1"], 4) #**Extraction** extract.abs_energy(df["Close"]) extract.cid_ce(df["Close"], True) extract.mean_abs_change(df["Close"]) extract.mean_second_derivative_central(df["Close"]) extract.variance_larger_than_standard_deviation(df["Close"]) # extract.var_index(df["Close"].values,var_index_param) extract.symmetry_looking(df["Close"]) extract.has_duplicate_max(df["Close"]) extract.partial_autocorrelation(df["Close"]) extract.augmented_dickey_fuller(df["Close"]) extract.gskew(df["Close"]) extract.stetson_mean(df["Close"]) extract.length(df["Close"]) extract.count_above_mean(df["Close"]) extract.longest_strike_below_mean(df["Close"]) extract.wozniak(df["Close"]) extract.last_location_of_maximum(df["Close"]) extract.fft_coefficient(df["Close"]) extract.ar_coefficient(df["Close"]) extract.index_mass_quantile(df["Close"]) extract.number_cwt_peaks(df["Close"]) extract.spkt_welch_density(df["Close"]) extract.linear_trend_timewise(df["Close"]) extract.c3(df["Close"]) extract.binned_entropy(df["Close"]) extract.svd_entropy(df["Close"].values) extract.hjorth_complexity(df["Close"]) extract.max_langevin_fixed_point(df["Close"]) extract.percent_amplitude(df["Close"]) extract.cad_prob(df["Close"]) extract.zero_crossing_derivative(df["Close"]) extract.detrended_fluctuation_analysis(df["Close"]) extract.fisher_information(df["Close"]) extract.higuchi_fractal_dimension(df["Close"]) extract.petrosian_fractal_dimension(df["Close"]) extract.hurst_exponent(df["Close"]) extract.largest_lyauponov_exponent(df["Close"]) extract.whelch_method(df["Close"]) extract.find_freq(df["Close"]) extract.flux_perc(df["Close"]) extract.range_cum_s(df["Close"]) extract.structure_func(df["Close"]) extract.kurtosis(df["Close"]) extract.stetson_k(df["Close"])