def get_lt(arr):
    params = [{
        'attr': 'pvalue'
    }, {
        'attr': 'rvalue'
    }, {
        'attr': 'slope'
    }, {
        'attr': 'stderr'
    }]
    res = np.array([item[1] for item in linear_trend(arr, params)])
    return np.nan_to_num(res)
示例#2
0
def extract_features(data):
    day = 24 * 60

    return list(
        numpy.nan_to_num(
            numpy.array([
                feature.symmetry_looking(data, [{
                    'r': 0.3
                }])[0][1],
                feature.variance_larger_than_standard_deviation(data).bool(),
                feature.ratio_beyond_r_sigma(data, 2),
                feature.has_duplicate_max(data),
                feature.has_duplicate_min(data),
                feature.has_duplicate(data),
                feature.agg_autocorrelation(numpy.array(data.value),
                                            [{
                                                'f_agg': 'mean',
                                                'maxlag': day
                                            }])[0][1],
                feature.partial_autocorrelation(data, [{
                    'lag': day
                }])[0][1],
                feature.abs_energy(numpy.array(data.value)),
                feature.mean_change(data),
                feature.mean_second_derivative_central(data),
                feature.median(data),
                float(feature.mean(data)),
                float(feature.standard_deviation(data)),
                float(feature.longest_strike_below_mean(data)),
                float(feature.longest_strike_above_mean(data)),
                int(feature.number_peaks(data, 10)),
                feature.linear_trend(numpy.array(data.value), [{
                    'attr': 'rvalue'
                }])[0][1],
                feature.c3(data, day),
                float(feature.maximum(data)),
                float(feature.minimum(data))
            ])))
示例#3
0
 def function(x):
     param = [{'attr': self.attr}]
     return list(linear_trend(x, param))[0][1]
示例#4
0
    def features(self, x, prefix):
        feature_dict = dict()

        # create features here
        # numpy
        feature_dict[prefix + '_' + 'mean'] = np.mean(x)
        feature_dict[prefix + '_' + 'max'] = np.max(x)
        feature_dict[prefix + '_' + 'min'] = np.min(x)
        feature_dict[prefix + '_' + 'std'] = np.std(x)
        feature_dict[prefix + '_' + 'var'] = np.var(x)
        feature_dict[prefix + '_' + 'ptp'] = np.ptp(x)
        feature_dict[prefix + '_' + 'percentile_10'] = np.percentile(x, 10)
        feature_dict[prefix + '_' + 'percentile_20'] = np.percentile(x, 20)
        feature_dict[prefix + '_' + 'percentile_30'] = np.percentile(x, 30)
        feature_dict[prefix + '_' + 'percentile_40'] = np.percentile(x, 40)
        feature_dict[prefix + '_' + 'percentile_50'] = np.percentile(x, 50)
        feature_dict[prefix + '_' + 'percentile_60'] = np.percentile(x, 60)
        feature_dict[prefix + '_' + 'percentile_70'] = np.percentile(x, 70)
        feature_dict[prefix + '_' + 'percentile_80'] = np.percentile(x, 80)
        feature_dict[prefix + '_' + 'percentile_90'] = np.percentile(x, 90)

        # scipy
        feature_dict[prefix + '_' + 'skew'] = sp.stats.skew(x)
        feature_dict[prefix + '_' + 'kurtosis'] = sp.stats.kurtosis(x)
        feature_dict[prefix + '_' + 'kstat_1'] = sp.stats.kstat(x, 1)
        feature_dict[prefix + '_' + 'kstat_2'] = sp.stats.kstat(x, 2)
        feature_dict[prefix + '_' + 'kstat_3'] = sp.stats.kstat(x, 3)
        feature_dict[prefix + '_' + 'kstat_4'] = sp.stats.kstat(x, 4)
        feature_dict[prefix + '_' + 'moment_1'] = sp.stats.moment(x, 1)
        feature_dict[prefix + '_' + 'moment_2'] = sp.stats.moment(x, 2)
        feature_dict[prefix + '_' + 'moment_3'] = sp.stats.moment(x, 3)
        feature_dict[prefix + '_' + 'moment_4'] = sp.stats.moment(x, 4)

        # tsfresh
        feature_dict[prefix + '_' +
                     'abs_energy'] = feature_calculators.abs_energy(x)
        feature_dict[
            prefix + '_' +
            'abs_sum_of_changes'] = feature_calculators.absolute_sum_of_changes(
                x)
        feature_dict[
            prefix + '_' +
            'count_above_mean'] = feature_calculators.count_above_mean(x)
        feature_dict[
            prefix + '_' +
            'count_below_mean'] = feature_calculators.count_below_mean(x)
        feature_dict[prefix + '_' +
                     'mean_abs_change'] = feature_calculators.mean_abs_change(
                         x)
        feature_dict[prefix + '_' +
                     'mean_change'] = feature_calculators.mean_change(x)
        feature_dict[
            prefix + '_' +
            'var_larger_than_std_dev'] = feature_calculators.variance_larger_than_standard_deviation(
                x)
        feature_dict[prefix + '_' +
                     'range_minf_m4000'] = feature_calculators.range_count(
                         x, -np.inf, -4000)
        feature_dict[prefix + '_' +
                     'range_m4000_m3000'] = feature_calculators.range_count(
                         x, -4000, -3000)
        feature_dict[prefix + '_' +
                     'range_m3000_m2000'] = feature_calculators.range_count(
                         x, -3000, -2000)
        feature_dict[prefix + '_' +
                     'range_m2000_m1000'] = feature_calculators.range_count(
                         x, -2000, -1000)
        feature_dict[prefix + '_' +
                     'range_m1000_0'] = feature_calculators.range_count(
                         x, -1000, 0)
        feature_dict[prefix + '_' +
                     'range_0_p1000'] = feature_calculators.range_count(
                         x, 0, 1000)
        feature_dict[prefix + '_' +
                     'range_p1000_p2000'] = feature_calculators.range_count(
                         x, 1000, 2000)
        feature_dict[prefix + '_' +
                     'range_p2000_p3000'] = feature_calculators.range_count(
                         x, 2000, 3000)
        feature_dict[prefix + '_' +
                     'range_p3000_p4000'] = feature_calculators.range_count(
                         x, 3000, 4000)
        feature_dict[prefix + '_' +
                     'range_p4000_pinf'] = feature_calculators.range_count(
                         x, 4000, np.inf)

        feature_dict[
            prefix + '_' +
            'ratio_unique_values'] = feature_calculators.ratio_value_number_to_time_series_length(
                x)
        feature_dict[
            prefix + '_' +
            'first_loc_min'] = feature_calculators.first_location_of_minimum(x)
        feature_dict[
            prefix + '_' +
            'first_loc_max'] = feature_calculators.first_location_of_maximum(x)
        feature_dict[
            prefix + '_' +
            'last_loc_min'] = feature_calculators.last_location_of_minimum(x)
        feature_dict[
            prefix + '_' +
            'last_loc_max'] = feature_calculators.last_location_of_maximum(x)
        feature_dict[
            prefix + '_' +
            'time_rev_asym_stat_10'] = feature_calculators.time_reversal_asymmetry_statistic(
                x, 10)
        feature_dict[
            prefix + '_' +
            'time_rev_asym_stat_100'] = feature_calculators.time_reversal_asymmetry_statistic(
                x, 100)
        feature_dict[
            prefix + '_' +
            'time_rev_asym_stat_1000'] = feature_calculators.time_reversal_asymmetry_statistic(
                x, 1000)
        feature_dict[
            prefix + '_' +
            'autocorrelation_1'] = feature_calculators.autocorrelation(x, 1)
        feature_dict[
            prefix + '_' +
            'autocorrelation_2'] = feature_calculators.autocorrelation(x, 2)
        feature_dict[
            prefix + '_' +
            'autocorrelation_3'] = feature_calculators.autocorrelation(x, 3)
        feature_dict[
            prefix + '_' +
            'autocorrelation_4'] = feature_calculators.autocorrelation(x, 4)
        feature_dict[
            prefix + '_' +
            'autocorrelation_5'] = feature_calculators.autocorrelation(x, 5)
        feature_dict[
            prefix + '_' +
            'autocorrelation_6'] = feature_calculators.autocorrelation(x, 6)
        feature_dict[
            prefix + '_' +
            'autocorrelation_7'] = feature_calculators.autocorrelation(x, 7)
        feature_dict[
            prefix + '_' +
            'autocorrelation_8'] = feature_calculators.autocorrelation(x, 8)
        feature_dict[
            prefix + '_' +
            'autocorrelation_9'] = feature_calculators.autocorrelation(x, 9)
        feature_dict[
            prefix + '_' +
            'autocorrelation_10'] = feature_calculators.autocorrelation(x, 10)
        feature_dict[
            prefix + '_' +
            'autocorrelation_50'] = feature_calculators.autocorrelation(x, 50)
        feature_dict[
            prefix + '_' +
            'autocorrelation_100'] = feature_calculators.autocorrelation(
                x, 100)
        feature_dict[
            prefix + '_' +
            'autocorrelation_1000'] = feature_calculators.autocorrelation(
                x, 1000)
        feature_dict[prefix + '_' + 'c3_1'] = feature_calculators.c3(x, 1)
        feature_dict[prefix + '_' + 'c3_2'] = feature_calculators.c3(x, 2)
        feature_dict[prefix + '_' + 'c3_3'] = feature_calculators.c3(x, 3)
        feature_dict[prefix + '_' + 'c3_4'] = feature_calculators.c3(x, 4)
        feature_dict[prefix + '_' + 'c3_5'] = feature_calculators.c3(x, 5)
        feature_dict[prefix + '_' + 'c3_10'] = feature_calculators.c3(x, 10)
        feature_dict[prefix + '_' + 'c3_100'] = feature_calculators.c3(x, 100)
        for c in range(1, 34):
            feature_dict[prefix + '_' + 'fft_{0}_real'.format(c)] = list(
                feature_calculators.fft_coefficient(x, [{
                    'coeff': c,
                    'attr': 'real'
                }]))[0][1]
            feature_dict[prefix + '_' + 'fft_{0}_imag'.format(c)] = list(
                feature_calculators.fft_coefficient(x, [{
                    'coeff': c,
                    'attr': 'imag'
                }]))[0][1]
            feature_dict[prefix + '_' + 'fft_{0}_ang'.format(c)] = list(
                feature_calculators.fft_coefficient(x, [{
                    'coeff': c,
                    'attr': 'angle'
                }]))[0][1]
        feature_dict[
            prefix + '_' +
            'long_strk_above_mean'] = feature_calculators.longest_strike_above_mean(
                x)
        feature_dict[
            prefix + '_' +
            'long_strk_below_mean'] = feature_calculators.longest_strike_below_mean(
                x)
        feature_dict[prefix + '_' + 'cid_ce_0'] = feature_calculators.cid_ce(
            x, 0)
        feature_dict[prefix + '_' + 'cid_ce_1'] = feature_calculators.cid_ce(
            x, 1)
        feature_dict[prefix + '_' +
                     'binned_entropy_5'] = feature_calculators.binned_entropy(
                         x, 5)
        feature_dict[prefix + '_' +
                     'binned_entropy_10'] = feature_calculators.binned_entropy(
                         x, 10)
        feature_dict[prefix + '_' +
                     'binned_entropy_20'] = feature_calculators.binned_entropy(
                         x, 20)
        feature_dict[prefix + '_' +
                     'binned_entropy_50'] = feature_calculators.binned_entropy(
                         x, 50)
        feature_dict[prefix + '_' +
                     'binned_entropy_80'] = feature_calculators.binned_entropy(
                         x, 80)
        feature_dict[
            prefix + '_' +
            'binned_entropy_100'] = feature_calculators.binned_entropy(x, 100)

        feature_dict[prefix + '_' +
                     'num_crossing_0'] = feature_calculators.number_crossing_m(
                         x, 0)
        feature_dict[prefix + '_' +
                     'num_peaks_1'] = feature_calculators.number_peaks(x, 1)
        feature_dict[prefix + '_' +
                     'num_peaks_3'] = feature_calculators.number_peaks(x, 3)
        feature_dict[prefix + '_' +
                     'num_peaks_5'] = feature_calculators.number_peaks(x, 5)
        feature_dict[prefix + '_' +
                     'num_peaks_10'] = feature_calculators.number_peaks(x, 10)
        feature_dict[prefix + '_' +
                     'num_peaks_50'] = feature_calculators.number_peaks(x, 50)
        feature_dict[prefix + '_' +
                     'num_peaks_100'] = feature_calculators.number_peaks(
                         x, 100)
        feature_dict[prefix + '_' +
                     'num_peaks_500'] = feature_calculators.number_peaks(
                         x, 500)

        feature_dict[prefix + '_' + 'spkt_welch_density_1'] = list(
            feature_calculators.spkt_welch_density(x, [{
                'coeff': 1
            }]))[0][1]
        feature_dict[prefix + '_' + 'spkt_welch_density_2'] = list(
            feature_calculators.spkt_welch_density(x, [{
                'coeff': 2
            }]))[0][1]
        feature_dict[prefix + '_' + 'spkt_welch_density_5'] = list(
            feature_calculators.spkt_welch_density(x, [{
                'coeff': 5
            }]))[0][1]
        feature_dict[prefix + '_' + 'spkt_welch_density_8'] = list(
            feature_calculators.spkt_welch_density(x, [{
                'coeff': 8
            }]))[0][1]
        feature_dict[prefix + '_' + 'spkt_welch_density_10'] = list(
            feature_calculators.spkt_welch_density(x, [{
                'coeff': 10
            }]))[0][1]
        feature_dict[prefix + '_' + 'spkt_welch_density_50'] = list(
            feature_calculators.spkt_welch_density(x, [{
                'coeff': 50
            }]))[0][1]
        feature_dict[prefix + '_' + 'spkt_welch_density_100'] = list(
            feature_calculators.spkt_welch_density(x, [{
                'coeff': 100
            }]))[0][1]

        feature_dict[
            prefix + '_' +
            'time_rev_asym_stat_1'] = feature_calculators.time_reversal_asymmetry_statistic(
                x, 1)
        feature_dict[
            prefix + '_' +
            'time_rev_asym_stat_2'] = feature_calculators.time_reversal_asymmetry_statistic(
                x, 2)
        feature_dict[
            prefix + '_' +
            'time_rev_asym_stat_3'] = feature_calculators.time_reversal_asymmetry_statistic(
                x, 3)
        feature_dict[
            prefix + '_' +
            'time_rev_asym_stat_4'] = feature_calculators.time_reversal_asymmetry_statistic(
                x, 4)
        feature_dict[
            prefix + '_' +
            'time_rev_asym_stat_10'] = feature_calculators.time_reversal_asymmetry_statistic(
                x, 10)
        feature_dict[
            prefix + '_' +
            'time_rev_asym_stat_100'] = feature_calculators.time_reversal_asymmetry_statistic(
                x, 100)

        for r in range(20):
            feature_dict[prefix + '_' + 'symmetry_looking_' +
                         str(r)] = feature_calculators.symmetry_looking(
                             x, [{
                                 'r': r * 0.05
                             }])[0][1]

        for r in range(1, 20):
            feature_dict[
                prefix + '_' + 'large_standard_deviation_' +
                str(r)] = feature_calculators.large_standard_deviation(
                    x, r * 0.05)

        for r in range(1, 10):
            feature_dict[prefix + '_' + 'quantile_' +
                         str(r)] = feature_calculators.quantile(x, r * 0.1)

        for r in ['mean', 'median', 'var']:
            feature_dict[prefix + '_' + 'agg_autocorr_' +
                         r] = feature_calculators.agg_autocorrelation(
                             x, [{
                                 'f_agg': r,
                                 'maxlag': 40
                             }])[0][-1]

        #for r in range(1, 6):
        #    feature_dict[prefix+'_'+'number_cwt_peaks_'+str(r)] = feature_calculators.number_cwt_peaks(x, r)

        for r in range(1, 10):
            feature_dict[prefix + '_' + 'index_mass_quantile_' +
                         str(r)] = feature_calculators.index_mass_quantile(
                             x, [{
                                 'q': r
                             }])[0][1]

        #for ql in [0., .2, .4, .6, .8]:
        #    for qh in [.2, .4, .6, .8, 1.]:
        #        if ql < qh:
        #            for b in [False, True]:
        #                for f in ["mean", "var"]:
        #                    feature_dict[prefix+'_'+'change_quantiles_'+str(ql)+'_'+str(qh)+'_'+str(b)+'_'+str(f)] = feature_calculators.change_quantiles(x, ql, qh, b, f)

        #for r in [.1, .3, .5, .7, .9]:
        #    feature_dict[prefix+'_'+'approximate_entropy_'+str(r)] = feature_calculators.approximate_entropy(x, 2, r)

        feature_dict[
            prefix + '_' +
            'max_langevin_fixed_point'] = feature_calculators.max_langevin_fixed_point(
                x, 3, 30)

        for r in ['pvalue', 'rvalue', 'intercept', 'slope', 'stderr']:
            feature_dict[prefix + '_' + 'linear_trend_' +
                         str(r)] = feature_calculators.linear_trend(
                             x, [{
                                 'attr': r
                             }])[0][1]

        for r in ['pvalue', 'teststat', 'usedlag']:
            feature_dict[prefix + '_' + 'augmented_dickey_fuller_' +
                         r] = feature_calculators.augmented_dickey_fuller(
                             x, [{
                                 'attr': r
                             }])[0][1]

        for r in [0.5, 1, 1.5, 2, 2.5, 3, 5, 6, 7, 10]:
            feature_dict[prefix + '_' + 'ratio_beyond_r_sigma_' +
                         str(r)] = feature_calculators.ratio_beyond_r_sigma(
                             x, r)

        #for attr in ["pvalue", "rvalue", "intercept", "slope", "stderr"]:
        #    feature_dict[prefix+'_'+'linear_trend_timewise_'+attr] = feature_calculators.linear_trend_timewise(x, [{'attr': attr}])[0][1]
        #for attr in ["rvalue", "intercept", "slope", "stderr"]:
        #    for i in [5, 10, 50]:
        #        for f in ["max", "min", "mean", "var"]:
        #            feature_dict[prefix+'_'+'agg_linear_trend_'+attr+'_'+str(i)+'_'+f] = feature_calculators.agg_linear_trend(x, [{'attr': attr, 'chunk_len': i, 'f_agg': f}])[0][-1]
        #for width in [2, 5, 10, 20]:
        #    for coeff in range(15):
        #        for w in [2, 5, 10, 20]:
        #            feature_dict[prefix+'_'+'cwt_coefficients_'+str(width)+'_'+str(coeff)+'_'+str(w)] = list(feature_calculators.cwt_coefficients(x, [{'widths': width, 'coeff': coeff, 'w': w}]))[0][1]
        #for r in range(10):
        #    feature_dict[prefix+'_'+'partial_autocorr_'+str(r)] = feature_calculators.partial_autocorrelation(x, [{'lag': r}])[0][1]
        # "ar_coefficient": [{"coeff": coeff, "k": k} for coeff in range(5) for k in [10]],
        # "fft_coefficient": [{"coeff": k, "attr": a} for a, k in product(["real", "imag", "abs", "angle"], range(100))],
        # "fft_aggregated": [{"aggtype": s} for s in ["centroid", "variance", "skew", "kurtosis"]],
        # "value_count": [{"value": value} for value in [0, 1, -1]],
        # "range_count": [{"min": -1, "max": 1}, {"min": 1e12, "max": 0}, {"min": 0, "max": 1e12}],
        # "friedrich_coefficients": (lambda m: [{"coeff": coeff, "m": m, "r": 30} for coeff in range(m + 1)])(3),
        #  "energy_ratio_by_chunks": [{"num_segments": 10, "segment_focus": i} for i in range(10)],
        return feature_dict
def lin_reg(x):
    lr = ts.linear_trend(x, param=[{'attr': 'slope'}, {'attr': 'intercept'}])
    return {'lr_slope': lr[0][1], 'lr_intercept': lr[1][1]}
def generate_time_series_feats(x_dataset, dataset_name="raw", test=False):
    make_dir_if_not_exists(os.path.join(FEATURES_PATH, 'tsfeats'))
    time_length = x_dataset.shape[1]

    features_function_dict = {
        "mean":
        mean,
        "median":
        median,
        "length":
        length,
        "minimum":
        minimum,
        "maximum":
        maximum,
        "variance":
        variance,
        "skewness":
        skewness,
        "kurtosis":
        kurtosis,
        "sum_values":
        sum_values,
        "abs_energy":
        abs_energy,
        "mean_change":
        mean_change,
        "mean_abs_change":
        mean_abs_change,
        "count_below_mean":
        count_below_mean,
        "count_above_mean":
        count_above_mean,
        "has_duplicate_min":
        has_duplicate_min,
        "has_duplicate_max":
        has_duplicate_max,
        "standard_deviation":
        standard_deviation,
        "absolute_sum_of_changes":
        absolute_sum_of_changes,
        "last_location_of_minimum":
        last_location_of_minimum,
        "last_location_of_maximum":
        last_location_of_maximum,
        "first_location_of_maximum":
        first_location_of_maximum,
        "longest_strike_below_mean":
        longest_strike_below_mean,
        "longest_strike_above_mean":
        longest_strike_above_mean,
        "sum_of_reoccurring_values":
        sum_of_reoccurring_values,
        "first_location_of_minimum":
        first_location_of_minimum,
        "sum_of_reoccurring_data_points":
        sum_of_reoccurring_data_points,
        "variance_larger_than_standard_deviation":
        variance_larger_than_standard_deviation,
        "ratio_value_number_to_time_series_length":
        ratio_value_number_to_time_series_length,
        "percentage_of_reoccurring_values_to_all_values":
        percentage_of_reoccurring_values_to_all_values,
        "binned_entropy_max300":
        lambda x: binned_entropy(x, 300),
        "binned_entropy_max400":
        lambda x: binned_entropy(x, 400),
        "cid_ce_true":
        lambda x: cid_ce(x, True),
        "cid_ce_false":
        lambda x: cid_ce(x, False),
        "percentage_of_reoccurring_datapoints_to_all_datapoints":
        percentage_of_reoccurring_datapoints_to_all_datapoints
    }

    for feature_name, function_call in features_function_dict.iteritems():
        print "{:.<70s}".format("- Processing feature: %s" % feature_name),
        feature_name = 'tsfeats/%s_%s' % (dataset_name, feature_name)
        if not features_exists(feature_name, test):
            feats = x_dataset.apply(function_call, axis=1, raw=True).values
            save_features(feats, feature_name, test)
            print("Done")
        else:
            print("Already generated")

    ar_param_k100 = [{"coeff": i, "k": 100} for i in range(100 + 1)]
    ar_param_k500 = [{"coeff": i, "k": 500} for i in range(500 + 1)]
    agg50_mean_linear_trend = [{
        "attr": val,
        "chunk_len": 50,
        "f_agg": "mean"
    } for val in ("pvalue", "rvalue", "intercept", "slope", "stderr")]
    aug_dickey_fuler_params = [{
        "attr": "teststat"
    }, {
        "attr": "pvalue"
    }, {
        "attr": "usedlag"
    }]
    energy_ratio_num10_focus5 = [{"num_segments": 10, "segment_focus": 5}]
    fft_aggr_spectrum = [{
        "aggtype": "centroid"
    }, {
        "aggtype": "variance"
    }, {
        "aggtype": "skew"
    }, {
        "aggtype": "kurtosis"
    }]
    fft_coefficient_real = [{
        "coeff": i,
        "attr": "real"
    } for i in range((time_length + 1) // 2)]
    fft_coefficient_imag = [{
        "coeff": i,
        "attr": "imag"
    } for i in range((time_length + 1) // 2)]
    fft_coefficient_abs = [{
        "coeff": i,
        "attr": "abs"
    } for i in range((time_length + 1) // 2)]
    fft_coefficient_angle = [{
        "coeff": i,
        "attr": "angle"
    } for i in range((time_length + 1) // 2)]
    linear_trend_params = [{
        "attr": val
    } for val in ("pvalue", "rvalue", "intercept", "slope", "stderr")]

    other_feats_dict = {
        "ar_coeff100":
        lambda x: dict(ar_coefficient(x, ar_param_k100)),
        "ar_coeff500":
        lambda x: dict(ar_coefficient(x, ar_param_k500)),
        "agg50_mean_lin_trend":
        lambda x: dict(agg_linear_trend(x, agg50_mean_linear_trend)),
        "aug_dickey_fuler":
        lambda x: dict(augmented_dickey_fuller(x, aug_dickey_fuler_params)),
        "energy_ratio_num10_focus5":
        lambda x: dict(energy_ratio_by_chunks(x, energy_ratio_num10_focus5)),
        "fft_aggr_spectrum":
        lambda x: dict(fft_aggregated(x, fft_aggr_spectrum)),
        "fft_coeff_real":
        lambda x: dict(fft_coefficient(x, fft_coefficient_real)),
        "fft_coeff_imag":
        lambda x: dict(fft_coefficient(x, fft_coefficient_imag)),
        "fft_coeff_abs":
        lambda x: dict(fft_coefficient(x, fft_coefficient_abs)),
        "fft_coeff_angle":
        lambda x: dict(fft_coefficient(x, fft_coefficient_angle)),
        "linear_trend":
        lambda x: dict(linear_trend(x, linear_trend_params)),
    }

    for feature_name, function_call in other_feats_dict.iteritems():
        print "{:.<70s}".format("- Processing features: %s" % feature_name),
        feature_name = 'tsfeats/%s_%s' % (dataset_name, feature_name)
        if not features_exists(feature_name, test):
            feats_dict = x_dataset.apply(function_call, axis=1,
                                         raw=True).values.tolist()
            feats = pd.DataFrame.from_dict(feats_dict)
            save_features(feats.values, feature_name, test)
            print("Done")
        else:
            print("Already generated")

    # Auto-correlations as features
    print("- Processing Auto-correlation features...")
    corr_dataset = x_dataset.apply(autocorrelation_all, axis=1, raw=True)
    save_features(corr_dataset.values,
                  '%s_auto_correlation_all' % dataset_name, test)

    print("- Processing ARIMA(5,5,1) Features...")
    arima_features = parallelize_row(x_dataset.values,
                                     generate_arima_feats,
                                     n_jobs=2)
    assert arima_features.shape[0] == x_dataset.shape[0]  # Assert the axis
    save_features(arima_features, '%s_arima_5_5_1' % dataset_name, test)
示例#7
0
文件: main.py 项目: Ry0/PythonStudy
}])
# フーリエ変換

number_peaks = feature_calculators.number_peaks(data[:1000], 50)
# ピークの数

index_mass_quantile = feature_calculators.index_mass_quantile(
    data[:1000], [{
        'q': 0.5
    }, {
        'q': 0.1
    }])
# パーセンタイル処理

linear_trend = feature_calculators.linear_trend(range_data,
                                                [{
                                                    'attr': "slope"
                                                }, {
                                                    'attr': 'intercept'
                                                }, {
                                                    'attr': 'rvalue'
                                                }])
# 単純なトレンド分析。attrに関しては下記を参照
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.linregress.html

autocorrelation = feature_calculators.autocorrelation(data, 100)
# 自己相関の計算

plt.plot(fft_aggregated)
plt.show()