Exemplo n.º 1
0
    def test_from_columns(self):
        tsn = "TEST_TIME_SERIES"

        fset = ComprehensiveFCParameters()
        self.assertRaises(TypeError, from_columns, 42)
        self.assertRaises(TypeError, from_columns, 42)
        self.assertRaises(ValueError, from_columns, ["This is not a column name"])
        self.assertRaises(ValueError, from_columns, ["This__neither"])
        self.assertRaises(ValueError, from_columns, ["This__also__not"])

        # Aggregate functions
        feature_names = [tsn + '__sum_values', tsn + "__median", tsn + "__length", tsn + "__sample_entropy"]

        # Aggregate functions with params
        feature_names += [tsn + '__quantile__q_10', tsn + '__quantile__q_70', tsn + '__number_peaks__n_30',
                          tsn + '__value_count__value_inf', tsn + '__value_count__value_-inf',
                          tsn + '__value_count__value_nan']

        # Apply functions
        feature_names += [tsn + '__ar_coefficient__k_20__coeff_4', tsn + '__ar_coefficient__coeff_10__k_-1']

        kind_to_fc_parameters = from_columns(feature_names)

        six.assertCountEqual(self, list(kind_to_fc_parameters[tsn].keys()),
                             ["sum_values", "median", "length", "sample_entropy", "quantile", "number_peaks",
                              "ar_coefficient", "value_count"])

        self.assertEqual(kind_to_fc_parameters[tsn]["sum_values"], None)
        self.assertEqual(kind_to_fc_parameters[tsn]["ar_coefficient"],
                         [{"k": 20, "coeff": 4}, {"k": -1, "coeff": 10}])

        self.assertEqual(kind_to_fc_parameters[tsn]["value_count"],
                         [{"value": np.PINF}, {"value": np.NINF}, {"value": np.NaN}])

        # test that it passes for all functions
        fset = ComprehensiveFCParameters()
        X_org = extract_features(pd.DataFrame({"value": [1, 2, 3], "id": [1, 1, 1]}),
                                 default_fc_parameters=fset,
                                 column_id="id", column_value="value",
                                 n_jobs=0)

        inferred_fset = from_columns(X_org)

        X_new = extract_features(pd.DataFrame({"value": [1, 2, 3], "id": [1, 1, 1]}),
                                 kind_to_fc_parameters=inferred_fset,
                                 column_id="id", column_value="value",
                                 n_jobs=0)

        assert_frame_equal(X_org.sort_index(), X_new.sort_index())
Exemplo n.º 2
0
def get_ts_features(X: Union[np.ndarray, torch.Tensor],
                    y: Union[None, np.ndarray, torch.Tensor] = None,
                    features: Union[str, dict] = 'min',
                    n_jobs: Optional[int] = None,
                    **kwargs):
    """
    Args:
        X: np.array or torch.Tesnor of shape [samples, dimensions, timesteps].
        y: Not required for unlabeled data. Otherwise, you need to pass it.
        features: 'min', 'efficient', 'all', or a dictionary. Be aware that 'efficient' and 'all' may required substantial memory and time.
    """
    df = to_tsfresh_df(X)
    n_jobs = ifnone(n_jobs, defaults.cpus)
    if 'default_fc_parameters' in kwargs.keys():
        default_fc_parameters = default_fc_parameters
    elif features == 'min':
        default_fc_parameters = MinimalFCParameters()
    elif features == 'efficient':
        default_fc_parameters = EfficientFCParameters()
    elif features == 'all':
        default_fc_parameters = ComprehensiveFCParameters()
    else:
        default_fc_parameters = None
    df = tsfresh.extract_features(df,
                                  column_id="id",
                                  n_jobs=n_jobs,
                                  default_fc_parameters=default_fc_parameters,
                                  **kwargs)
    if y is not None:
        if y.ndim == 1: y = y.reshape(-1, 1)
        for i in range(y.shape[-1]):
            df['target' if y.shape[-1] == 1 else f'target_{i}'] = y[:, i]
    return df
Exemplo n.º 3
0
    def __init__(self, **kwargs):
        accepted_types = [
            pd.Series
        ]

        default_fc_parameters = ComprehensiveFCParameters()
        extraction_function = partial(_do_extraction_on_chunk,
                                      default_fc_parameters=default_fc_parameters,
                                      kind_to_fc_parameters=None)

        def series_transform(series):
            series_name = series.name
            if series_name is None:
                series_name = self.name

            input_series = (
                1, series_name, series
            )
            extracted_data = extraction_function(input_series)
            extracted_data_flat = {
                x['variable']: x['value']
                for x in extracted_data
            }
            return extracted_data_flat

        super(TsFreshSeriesTransformer, self).__init__(data_types=accepted_types,
                                                       columns=None,
                                                       transform_function=series_transform)
Exemplo n.º 4
0
def _feature_extraction_on_chunk_helper(df, column_id, column_kind,
                                        column_sort, column_value,
                                        default_fc_parameters,
                                        kind_to_fc_parameters):
    """
    Helper function wrapped around _do_extraction_on_chunk to use the correct format
    of the "chunk" and output a pandas dataframe.
    Is used e.g. in the convenience functions for dask and spark.

    For the definitions of the parameters, please see these convenience functions.
    """
    if default_fc_parameters is None and kind_to_fc_parameters is None:
        default_fc_parameters = ComprehensiveFCParameters()
    elif default_fc_parameters is None and kind_to_fc_parameters is not None:
        default_fc_parameters = {}

    if column_sort is not None:
        df = df.sort_values(column_sort)

    chunk = df[column_id].iloc[0], df[column_kind].iloc[0], df[column_value]
    features = _do_extraction_on_chunk(
        chunk,
        default_fc_parameters=default_fc_parameters,
        kind_to_fc_parameters=kind_to_fc_parameters)
    features = pd.DataFrame(features, columns=[column_id, "variable", "value"])
    features["value"] = features["value"].astype("double")

    return features[[column_id, "variable", "value"]]
Exemplo n.º 5
0
    def _get_extraction_params(self):
        """Helper function to set default parameters from tsfresh"""
        # make n_jobs compatible with scikit-learn
        self.n_jobs = check_n_jobs(self.n_jobs)

        # lazy imports to avoid hard dependency
        from tsfresh.defaults import CHUNKSIZE
        from tsfresh.defaults import DISABLE_PROGRESSBAR
        from tsfresh.utilities.dataframe_functions import impute
        from tsfresh.defaults import N_PROCESSES
        from tsfresh.defaults import PROFILING
        from tsfresh.defaults import PROFILING_FILENAME
        from tsfresh.defaults import PROFILING_SORTING
        from tsfresh.defaults import SHOW_WARNINGS
        from tsfresh.feature_extraction.settings import ComprehensiveFCParameters
        from tsfresh.feature_extraction.settings import EfficientFCParameters
        from tsfresh.feature_extraction.settings import MinimalFCParameters

        # Set defaults from tsfresh
        extraction_params = {
            "kind_to_fc_parameters": self.kind_to_fc_parameters,
            "n_jobs": N_PROCESSES,
            "chunksize": CHUNKSIZE,
            "show_warnings": SHOW_WARNINGS,
            "disable_progressbar": DISABLE_PROGRESSBAR,
            "impute_function": impute,
            "profiling_sorting": PROFILING_SORTING,
            "profiling_filename": PROFILING_FILENAME,
            "profile": PROFILING,
        }

        # Replace defaults with user defined parameters
        for name in extraction_params.keys():
            if hasattr(self, name):
                value = getattr(self, name)
                if value is not None:
                    extraction_params[name] = value

        # Convert convenience string arguments to tsfresh parameters classes
        fc_param_lookup = {
            "minimal": MinimalFCParameters(),
            "efficient": EfficientFCParameters(),
            "comprehensive": ComprehensiveFCParameters(),
        }
        if isinstance(self.default_fc_parameters, str):
            if self.default_fc_parameters not in fc_param_lookup:
                raise ValueError(
                    f"If `default_fc_parameters` is passed as a "
                    f"string, "
                    f"it must be one of"
                    f" {fc_param_lookup.keys()}, but found: "
                    f"{self.default_fc_parameters}"
                )
            else:
                fc_parameters = fc_param_lookup[self.default_fc_parameters]
        else:
            fc_parameters = self.default_fc_parameters
        extraction_params["default_fc_parameters"] = fc_parameters

        return extraction_params
Exemplo n.º 6
0
    def test_extract_features_for_one_time_series(self):
        # todo: implement more methods and test more aspects
        df = self.create_test_data_sample()
        settings = ComprehensiveFCParameters()

        extracted_features = extract_features(df, default_fc_parameters=settings,
                                              column_value="val", column_id="id",
                                              column_kind="kind", column_sort="sort")

        self.assertIsInstance(extracted_features, pd.DataFrame)
        self.assertTrue(np.all(extracted_features.b__sum_values == np.array([757, 695])))
        self.assertTrue(np.all(extracted_features.b__minimum == np.array([3, 1])))
        self.assertTrue(np.all(extracted_features.b__abs_energy == np.array([36619, 35483])))
        self.assertTrue(np.all(extracted_features.b__mean == np.array([37.85, 34.75])))
        self.assertTrue(np.all(extracted_features.b__median == np.array([39.5, 28.0])))

        df_sts = self.create_one_valued_time_series()
        extracted_features_sts = extract_features(df_sts, default_fc_parameters=settings,
                                                  column_value="val", column_id="id",
                                                  column_kind="kind", column_sort="sort")

        self.assertIsInstance(extracted_features_sts, pd.DataFrame)
        self.assertTrue(np.all(extracted_features_sts.a__maximum == np.array([1.0, 6.0])))
        self.assertTrue(np.all(extracted_features_sts.a__sum_values == np.array([1.0, 11.0])))
        self.assertTrue(np.all(extracted_features_sts.a__count_above_mean == np.array([0, 1])))
def extractFeatures(dataSetToExtractFrom, feature_settings="minimal"):
    """ Extracts features of the given dataset and returns a new dataset of features only.

    Keyword arguments:
    dataSetToExtractFrom     -- Dataset (type: pandas.core.frame.DataFrame)
    feature_settings         -- Feature extraction parameter (type: string, options: 'minimal','maximal', 'findBest')

    Returns:
        pandas.core.frame.DataFrame
    """

    dataset_for_extraction = dataSetToExtractFrom.drop(
        columns=['label', 'hand', 'annotator'])

    if feature_settings == "minimal":
        extractedFeatures = MinimalFCParameters()
    elif feature_settings == "maximal":
        extractedFeatures = ComprehensiveFCParameters()
    elif feature_settings == "findBest":
        extractedFeatures = EfficientFCParameters()
    else:
        extractedFeatures = MinimalFCParameters()
        print('Given value for feature_parameter not valid! Minimal feature set is used instead.')

    extracted_featureset = extract_features(dataset_for_extraction, column_id="punch_id",
                                            column_sort="timestamp", impute_function=impute, default_fc_parameters=extractedFeatures)
    return extracted_featureset
Exemplo n.º 8
0
    def test_from_columns(self):
        tsn = "TEST_TIME_SERIES"

        fset = ComprehensiveFCParameters()
        self.assertRaises(TypeError, from_columns, 42)
        self.assertRaises(TypeError, from_columns, 42)
        self.assertRaises(ValueError, from_columns,
                          ["This is not a column name"])
        self.assertRaises(ValueError, from_columns, ["This__neither"])
        self.assertRaises(ValueError, from_columns, ["This__also__not"])

        # Aggregate functions
        feature_names = [
            tsn + '__sum_values', tsn + "__median", tsn + "__length",
            tsn + "__sample_entropy"
        ]

        # Aggregate functions with params
        feature_names += [
            tsn + '__quantile__q_10', tsn + '__quantile__q_70',
            tsn + '__number_peaks__n_30', tsn + '__value_count__value_inf',
            tsn + '__value_count__value_-inf', tsn + '__value_count__value_nan'
        ]

        # Apply functions
        feature_names += [
            tsn + '__ar_coefficient__k_20__coeff_4',
            tsn + '__ar_coefficient__coeff_10__k_-1'
        ]

        kind_to_fc_parameters = from_columns(feature_names)

        six.assertCountEqual(self, list(kind_to_fc_parameters[tsn].keys()), [
            "sum_values", "median", "length", "sample_entropy", "quantile",
            "number_peaks", "ar_coefficient", "value_count"
        ])

        self.assertEqual(kind_to_fc_parameters[tsn]["sum_values"], None)
        self.assertEqual(kind_to_fc_parameters[tsn]["ar_coefficient"],
                         [{
                             "k": 20,
                             "coeff": 4
                         }, {
                             "k": -1,
                             "coeff": 10
                         }])

        self.assertEqual(kind_to_fc_parameters[tsn]["value_count"],
                         [{
                             "value": np.PINF
                         }, {
                             "value": np.NINF
                         }, {
                             "value": np.NaN
                         }])
Exemplo n.º 9
0
 def test_from_column_correct_for_comprehensive_fc_parameters(self):
     fset = ComprehensiveFCParameters()
     X_org = extract_features(pd.DataFrame({"value": [1, 2, 3], "id": [1, 1, 1]}),
                              default_fc_parameters=fset,
                              column_id="id", column_value="value",
                              n_jobs=0)
     inferred_fset = from_columns(X_org)
     X_new = extract_features(pd.DataFrame({"value": [1, 2, 3], "id": [1, 1, 1]}),
                              kind_to_fc_parameters=inferred_fset,
                              column_id="id", column_value="value",
                              n_jobs=0)
     assert_frame_equal(X_org.sort_index(), X_new.sort_index())
Exemplo n.º 10
0
    def test_default_calculates_all_features(self):
        """
        Test that by default a ComprehensiveFCParameters object should be set up to calculate all features defined
        in tsfresh.feature_extraction.feature_calculators
        """
        settings = ComprehensiveFCParameters()
        all_feature_calculators = [name for name, func in feature_calculators.__dict__.items()
                                   if hasattr(func, "fctype")]

        for calculator in all_feature_calculators:
            self.assertIn(calculator, settings,
                          msg='Default ComprehensiveFCParameters object does not setup calculation of {}'
                          .format(calculator))
Exemplo n.º 11
0
def comprehensive_fc_parameters():
    """A wrapper around the tsfresh function :class:`ComperehensiveFCParameters` to filter out unsupported parameter settings.

    Returns:
        parameters (dict) : a dictionary list of parameters

    Docstring source:
    https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.settings.ComprehensiveFCParameters
    """
    parameters = ComprehensiveFCParameters()

    # when a partial autocorrelation has a lag of zero,
    # an error is raised from `tsfresh.feature_extraction.extract_features`
    partial_autocorrelation = parameters['partial_autocorrelation']
    for index, values in enumerate(partial_autocorrelation):
        if values['lag'] == 0: del partial_autocorrelation[index]

    return parameters
Exemplo n.º 12
0
    def _extract_features(self, devices, trial_id):

        if self.motion == True:
            pickle_path = FEATURE_CACHE + 'X{}-{}-{}.pickle'.format(
                trial_id, self.window_size, self.feature_type)
        elif self.motion == 'only':
            pickle_path = FEATURE_CACHE + 'X{}-{}-{}-motion-only.pickle'.format(
                trial_id, self.window_size, self.feature_type)
        else:
            pickle_path = FEATURE_CACHE + 'X{}-{}-{}-no-motion.pickle'.format(
                trial_id, self.window_size, self.feature_type)
        if os.path.isfile(pickle_path):
            return pickle.load(open(pickle_path, "rb"))
        else:

            wrist_device = devices[0]
            if self.motion == True:
                input_columns = ['red', 'ir', 'gyro', 'accel']
            elif self.motion == 'only':
                input_columns = ['gyro', 'accel']
            else:
                input_columns = ['red', 'ir']
            X_raw = wrist_device[input_columns]

            X_windowed = self._windowize_tsfresh(X_raw)

            if self.feature_type == 'efficient':
                features = EfficientFCParameters()
            elif self.feature_type == 'comprehensive':
                features = ComprehensiveFCParameters()
            elif self.feature_type == 'minimal':
                features = MinimalFCParameters()
            else:
                raise RuntimeError("Invalid feature type")
            print("Extracting features for trial " + str(trial_id))
            X = extract_features(X_windowed,
                                 column_id='id',
                                 column_sort='time',
                                 n_jobs=N_JOBS,
                                 default_fc_parameters=features)
            impute(X)
            pickle.dump(X, open(pickle_path, "wb"))
            return X
Exemplo n.º 13
0
def extract_features(timeseries_container, default_fc_parameters=None,
                     kind_to_fc_parameters=None,
                     column_id=None, column_sort=None, column_kind=None, column_value=None,
                     chunksize=defaults.CHUNKSIZE,
                     n_jobs=defaults.N_PROCESSES, show_warnings=defaults.SHOW_WARNINGS,
                     disable_progressbar=defaults.DISABLE_PROGRESSBAR,
                     impute_function=defaults.IMPUTE_FUNCTION,
                     profile=defaults.PROFILING,
                     profiling_filename=defaults.PROFILING_FILENAME,
                     profiling_sorting=defaults.PROFILING_SORTING,
                     distributor=None):
    """
    Extract features from

    * a :class:`pandas.DataFrame` containing the different time series

    or

    * a dictionary of :class:`pandas.DataFrame` each containing one type of time series

    In both cases a :class:`pandas.DataFrame` with the calculated features will be returned.

    For a list of all the calculated time series features, please see the
    :class:`~tsfresh.feature_extraction.settings.ComprehensiveFCParameters` class,
    which is used to control which features with which parameters are calculated.

    For a detailed explanation of the different parameters and data formats please see :ref:`data-formats-label`.

    Examples
    ========

    >>> from tsfresh.examples import load_robot_execution_failures
    >>> from tsfresh import extract_features
    >>> df, _ = load_robot_execution_failures()
    >>> X = extract_features(df, column_id='id', column_sort='time')

    :param timeseries_container: The pandas.DataFrame with the time series to compute the features for, or a
            dictionary of pandas.DataFrames.
    :type timeseries_container: pandas.DataFrame or dict

    :param default_fc_parameters: mapping from feature calculator names to parameters. Only those names
           which are keys in this dict will be calculated. See the class:`ComprehensiveFCParameters` for
           more information.
    :type default_fc_parameters: dict

    :param kind_to_fc_parameters: mapping from kind names to objects of the same type as the ones for
            default_fc_parameters. If you put a kind as a key here, the fc_parameters
            object (which is the value), will be used instead of the default_fc_parameters. This means that kinds, for
            which kind_of_fc_parameters doe not have any entries, will be ignored by the feature selection.
    :type kind_to_fc_parameters: dict

    :param column_id: The name of the id column to group by.
    :type column_id: str

    :param column_sort: The name of the sort column.
    :type column_sort: str

    :param column_kind: The name of the column keeping record on the kind of the value.
    :type column_kind: str

    :param column_value: The name for the column keeping the value itself.
    :type column_value: str

    :param n_jobs: The number of processes to use for parallelization. If zero, no parallelization is used.
    :type n_jobs: int

    :param chunksize: The size of one chunk that is submitted to the worker
        process for the parallelisation.  Where one chunk is defined as a
        singular time series for one id and one kind. If you set the chunksize
        to 10, then it means that one task is to calculate all features for 10
        time series.  If it is set it to None, depending on distributor,
        heuristics are used to find the optimal chunksize. If you get out of
        memory exceptions, you can try it with the dask distributor and a
        smaller chunksize.
    :type chunksize: None or int

    :param: show_warnings: Show warnings during the feature extraction (needed for debugging of calculators).
    :type show_warnings: bool

    :param disable_progressbar: Do not show a progressbar while doing the calculation.
    :type disable_progressbar: bool

    :param impute_function: None, if no imputing should happen or the function to call for imputing.
    :type impute_function: None or callable

    :param profile: Turn on profiling during feature extraction
    :type profile: bool

    :param profiling_sorting: How to sort the profiling results (see the documentation of the profiling package for
           more information)
    :type profiling_sorting: basestring

    :param profiling_filename: Where to save the profiling results.
    :type profiling_filename: basestring

    :param distributor: Advanced parameter: set this to a class name that you want to use as a
             distributor. See the utilities/distribution.py for more information. Leave to None, if you want
             TSFresh to choose the best distributor.
    :type distributor: class

    :return: The (maybe imputed) DataFrame containing extracted features.
    :rtype: pandas.DataFrame
    """

    # Always use the standardized way of storing the data.
    # See the function normalize_input_to_internal_representation for more information.
    df_melt, column_id, column_kind, column_value = \
        dataframe_functions._normalize_input_to_internal_representation(
            timeseries_container=timeseries_container,
            column_id=column_id, column_kind=column_kind,
            column_sort=column_sort,
            column_value=column_value)
    # Use the standard setting if the user did not supply ones himself.
    if default_fc_parameters is None and kind_to_fc_parameters is None:
        default_fc_parameters = ComprehensiveFCParameters()
    elif default_fc_parameters is None and kind_to_fc_parameters is not None:
        default_fc_parameters = {}

    # If requested, do profiling (advanced feature)
    if profile:
        profiler = profiling.start_profiling()

    with warnings.catch_warnings():
        if not show_warnings:
            warnings.simplefilter("ignore")
        else:
            warnings.simplefilter("default")

        result = _do_extraction(df=df_melt,
                                column_id=column_id, column_value=column_value,
                                column_kind=column_kind,
                                n_jobs=n_jobs, chunk_size=chunksize,
                                disable_progressbar=disable_progressbar,
                                default_fc_parameters=default_fc_parameters,
                                kind_to_fc_parameters=kind_to_fc_parameters,
                                distributor=distributor)

        # Impute the result if requested
        if impute_function is not None:
            impute_function(result)

    # Turn off profiling if it was turned on
    if profile:
        profiling.end_profiling(profiler, filename=profiling_filename,
                                sorting=profiling_sorting)

    return result
Exemplo n.º 14
0
 def __init__(
         self, has_duplicate_max, binned_entropy, last_location_of_maximum,
         abs_energy, c3, value_count, mean_second_derivative_central,
         first_location_of_minimum, standard_deviation, length,
         mean_abs_change, has_duplicate_min, mean_change, sum_values,
         percentage_of_reoccurring_datapoints_to_all_datapoints,
         range_count, absolute_sum_of_changes, energy_ratio_by_chunks,
         last_location_of_minimum, linear_trend,
         variance_larger_than_standard_deviation, spkt_welch_density,
         cid_ce, symmetry_looking, has_duplicate, skewness,
         count_above_mean, longest_strike_below_mean, mean,
         agg_autocorrelation, ratio_value_number_to_time_series_length,
         fft_aggregated, first_location_of_maximum, partial_autocorrelation,
         sum_of_reoccurring_data_points, count_below_mean, variance,
         longest_strike_above_mean, median, kurtosis, minimum,
         time_reversal_asymmetry_statistic, number_crossing_m,
         sum_of_reoccurring_values, maximum, approximate_entropy,
         number_cwt_peaks, augmented_dickey_fuller, quantile,
         agg_linear_trend, max_langevin_fixed_point, friedrich_coefficients,
         fft_coefficient, large_standard_deviation, autocorrelation,
         cwt_coefficients, percentage_of_reoccurring_values_to_all_values,
         ar_coefficient, ratio_beyond_r_sigma, number_peaks, sample_entropy,
         change_quantiles):
     initial_map = ComprehensiveFCParameters()
     initial_map.pop("linear_trend_timewise")  # broken
     if not has_duplicate_max:
         initial_map.pop("has_duplicate_max")
     if not binned_entropy:
         initial_map.pop("binned_entropy")
     if not last_location_of_maximum:
         initial_map.pop("last_location_of_maximum")
     if not abs_energy:
         initial_map.pop("abs_energy")
     if not c3:
         initial_map.pop("c3")
     if not value_count:
         initial_map.pop("value_count")
     if not mean_second_derivative_central:
         initial_map.pop("mean_second_derivative_central")
     if not first_location_of_minimum:
         initial_map.pop("first_location_of_minimum")
     if not standard_deviation:
         initial_map.pop("standard_deviation")
     if not length:
         initial_map.pop("length")
     if not mean_abs_change:
         initial_map.pop("mean_abs_change")
     if not has_duplicate_min:
         initial_map.pop("has_duplicate_min")
     if not mean_change:
         initial_map.pop("mean_change")
     if not sum_values:
         initial_map.pop("sum_values")
     if not percentage_of_reoccurring_datapoints_to_all_datapoints:
         initial_map.pop(
             "percentage_of_reoccurring_datapoints_to_all_datapoints")
     if not range_count:
         initial_map.pop("range_count")
     if not absolute_sum_of_changes:
         initial_map.pop("absolute_sum_of_changes")
     if not energy_ratio_by_chunks:
         initial_map.pop("energy_ratio_by_chunks")
     if not last_location_of_minimum:
         initial_map.pop("last_location_of_minimum")
     if not linear_trend:
         initial_map.pop("linear_trend")
     if not variance_larger_than_standard_deviation:
         initial_map.pop("variance_larger_than_standard_deviation")
     if not spkt_welch_density:
         initial_map.pop("spkt_welch_density")
     if not cid_ce:
         initial_map.pop("cid_ce")
     if not symmetry_looking:
         initial_map.pop("symmetry_looking")
     if not has_duplicate:
         initial_map.pop("has_duplicate")
     if not skewness:
         initial_map.pop("skewness")
     if not count_above_mean:
         initial_map.pop("count_above_mean")
     if not longest_strike_below_mean:
         initial_map.pop("longest_strike_below_mean")
     if not mean:
         initial_map.pop("mean")
     if not agg_autocorrelation:
         initial_map.pop("agg_autocorrelation")
     if not ratio_value_number_to_time_series_length:
         initial_map.pop("ratio_value_number_to_time_series_length")
     if not fft_aggregated:
         initial_map.pop("fft_aggregated")
     if not first_location_of_maximum:
         initial_map.pop("first_location_of_maximum")
     if not partial_autocorrelation:
         initial_map.pop("partial_autocorrelation")
     if not sum_of_reoccurring_data_points:
         initial_map.pop("sum_of_reoccurring_data_points")
     if not count_below_mean:
         initial_map.pop("count_below_mean")
     if not variance:
         initial_map.pop("variance")
     if not longest_strike_above_mean:
         initial_map.pop("longest_strike_above_mean")
     if not median:
         initial_map.pop("median")
     if not kurtosis:
         initial_map.pop("kurtosis")
     if not minimum:
         initial_map.pop("minimum")
     if not time_reversal_asymmetry_statistic:
         initial_map.pop("time_reversal_asymmetry_statistic")
     if not number_crossing_m:
         initial_map.pop("number_crossing_m")
     if not sum_of_reoccurring_values:
         initial_map.pop("sum_of_reoccurring_values")
     if not maximum:
         initial_map.pop("maximum")
     if not approximate_entropy:
         initial_map.pop("approximate_entropy")
     if not number_cwt_peaks:
         initial_map.pop("number_cwt_peaks")
     if not augmented_dickey_fuller:
         initial_map.pop("augmented_dickey_fuller")
     if not quantile:
         initial_map.pop("quantile")
     if not agg_linear_trend:
         initial_map.pop("agg_linear_trend")
     if not max_langevin_fixed_point:
         initial_map.pop("max_langevin_fixed_point")
     if not friedrich_coefficients:
         initial_map.pop("friedrich_coefficients")
     if not fft_coefficient:
         initial_map.pop("fft_coefficient")
     if not large_standard_deviation:
         initial_map.pop("large_standard_deviation")
     if not autocorrelation:
         initial_map.pop("autocorrelation")
     if not cwt_coefficients:
         initial_map.pop("cwt_coefficients")
     if not percentage_of_reoccurring_values_to_all_values:
         initial_map.pop("percentage_of_reoccurring_values_to_all_values")
     if not ar_coefficient:
         initial_map.pop("ar_coefficient")
     if not ratio_beyond_r_sigma:
         initial_map.pop("ratio_beyond_r_sigma")
     if not number_peaks:
         initial_map.pop("number_peaks")
     if not sample_entropy:
         initial_map.pop("sample_entropy")
     if not change_quantiles:
         initial_map.pop("change_quantiles")
     super().__init__(initial_map)
Exemplo n.º 15
0
 def __init__(self):
     initial_map = ComprehensiveFCParameters()
     initial_map.pop("sample_entropy")
     initial_map.pop("change_quantiles")
     initial_map.pop("linear_trend_timewise")  # broken
     super().__init__(initial_map)
Exemplo n.º 16
0
 def __init__(self):
     initial_map = ComprehensiveFCParameters()
     initial_map.pop("sample_entropy")
     initial_map.pop("change_quantiles")
     initial_map.pop("approximate_entropy")
     initial_map.pop("number_cwt_peaks")
     initial_map.pop("augmented_dickey_fuller")
     initial_map.pop("quantile")
     initial_map.pop("agg_linear_trend")
     initial_map.pop("max_langevin_fixed_point")
     initial_map.pop("friedrich_coefficients")
     initial_map.pop("fft_coefficient")
     initial_map.pop("large_standard_deviation")
     initial_map.pop("autocorrelation")
     initial_map.pop("cwt_coefficients")
     initial_map.pop("percentage_of_reoccurring_values_to_all_values")
     initial_map.pop("ar_coefficient")
     initial_map.pop("ratio_beyond_r_sigma")
     initial_map.pop("number_peaks")
     initial_map.pop("linear_trend_timewise")  #broken
     super().__init__(initial_map)
Exemplo n.º 17
0
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

def extract_product_features(df,fc_parameter,destination):
  features_product = []
  extraction_method = fc_parameter.__class__.__name__
  for p in df.sitc_id.unique():
    product = df[df.sitc_id==p]
    p_features = extract_features(
      product[["export_val","year","country"]],
      column_id="country",
      column_sort="year",
      column_value=None,column_kind=None,
      chunksize=None,
      default_fc_parameters=fc_parameter
      )
    features_product.append(p_features)
    p_features.to_csv(f"{p}_{extraction_method}_expval.csv")
    print(f'Extracted features for {p}: \n {features_product}')
  product_features = pd.concat(features_product)
  return p_features

%timeit
destination_1 =f'{PATH}/efficient_parameters'
destination_2 = f'{PATH}/comprehensive_parameters'
fc_parameters=[EfficientFCParameters(),ComprehensiveFCParameters()]
extract_product_features(trade_dframe,fc_parameters[0],destination_1)
extract_product_features(trade_dframe,fc_parameters[1],destination_2)
Exemplo n.º 18
0
def extract_features(timeseries_container, default_fc_parameters=None,
                     kind_to_fc_parameters=None,
                     column_id=None, column_sort=None, column_kind=None, column_value=None,
                     parallelization=None, chunksize=defaults.CHUNKSIZE,
                     n_processes=defaults.N_PROCESSES, show_warnings=defaults.SHOW_WARNINGS,
                     disable_progressbar=defaults.DISABLE_PROGRESSBAR,
                     impute_function=defaults.IMPUTE_FUNCTION,
                     profile=defaults.PROFILING,
                     profiling_filename=defaults.PROFILING_FILENAME,
                     profiling_sorting=defaults.PROFILING_SORTING):
    """
    Extract features from

    * a :class:`pandas.DataFrame` containing the different time series

    or

    * a dictionary of :class:`pandas.DataFrame` each containing one type of time series

    In both cases a :class:`pandas.DataFrame` with the calculated features will be returned.

    For a list of all the calculated time series features, please see the
    :class:`~tsfresh.feature_extraction.settings.ComprehensiveFCParameters` class,
    which is used to control which features with which parameters are calculated.

    For a detailed explanation of the different parameters and data formats please see :ref:`data-formats-label`.

    Examples
    ========

    >>> from tsfresh.examples import load_robot_execution_failures
    >>> from tsfresh import extract_features
    >>> df, _ = load_robot_execution_failures()
    >>> X = extract_features(df, column_id='id', column_sort='time')

    :param timeseries_container: The pandas.DataFrame with the time series to compute the features for, or a
            dictionary of pandas.DataFrames.
    :type timeseries_container: pandas.DataFrame or dict

    :param default_fc_parameters: mapping from feature calculator names to parameters. Only those names
           which are keys in this dict will be calculated. See the class:`ComprehensiveFCParameters` for
           more information.
    :type default_fc_parameters: dict

    :param kind_to_fc_parameters: mapping from kind names to objects of the same type as the ones for
            default_fc_parameters. If you put a kind as a key here, the fc_parameters
            object (which is the value), will be used instead of the default_fc_parameters.
    :type kind_to_fc_parameters: dict

    :param column_id: The name of the id column to group by.
    :type column_id: str

    :param column_sort: The name of the sort column.
    :type column_sort: str

    :param column_kind: The name of the column keeping record on the kind of the value.
    :type column_kind: str

    :param column_value: The name for the column keeping the value itself.
    :type column_value: str

    :param parallelization: Either ``'per_sample'`` or ``'per_kind'``   , see
                            :func:`~tsfresh.feature_extraction.extraction._extract_features_parallel_per_sample`,
                            :func:`~tsfresh.feature_extraction.extraction._extract_features_parallel_per_kind` and
                            :ref:`parallelization-label` for details.
                            Choosing None makes the algorithm look for the best parallelization technique by applying
                            some general assumptions.
    :type parallelization: str

    :param chunksize: The size of one chunk for the parallelisation
    :type chunksize: None or int

    :param n_processes: The number of processes to use for parallelisation.
    :type n_processes: int

    :param: show_warnings: Show warnings during the feature extraction (needed for debugging of calculators).
    :type show_warnings: bool

    :param disable_progressbar: Do not show a progressbar while doing the calculation.
    :type disable_progressbar: bool

    :param impute_function: None, if no imputing should happen or the function to call for imputing.
    :type impute_function: None or function

    :param profile: Turn on profiling during feature extraction
    :type profile: bool

    :param profiling_sorting: How to sort the profiling results (see the documentation of the profiling package for
           more information)
    :type profiling_sorting: basestring

    :param profiling_filename: Where to save the profiling results.
    :type profiling_filename: basestring

    :return: The (maybe imputed) DataFrame containing extracted features.
    :rtype: pandas.DataFrame
    """
    import logging
    logging.basicConfig()

    # Always use the standardized way of storing the data.
    # See the function normalize_input_to_internal_representation for more information.
    kind_to_df_map, column_id, column_value = \
        dataframe_functions.normalize_input_to_internal_representation(df_or_dict=timeseries_container,
                                                                       column_id=column_id,
                                                                       column_sort=column_sort,
                                                                       column_kind=column_kind,
                                                                       column_value=column_value)

    # Use the standard setting if the user did not supply ones himself.
    if default_fc_parameters is None:
        default_fc_parameters = ComprehensiveFCParameters()

    # Choose the parallelization according to a rule-of-thumb
    if parallelization is None:
        parallelization = 'per_sample' if n_processes / 2 > len(kind_to_df_map) else 'per_kind'

    _logger.info('Parallelizing feature calculation {}'.format(parallelization))

    # If requested, do profiling (advanced feature)
    if profile:
        profiler = profiling.start_profiling()

    # Calculate the result
    if parallelization == 'per_kind':
        calculation_function = _extract_features_per_kind
    elif parallelization == 'per_sample':
        calculation_function = _extract_features_parallel_per_sample
    elif parallelization == 'serial':
        calculation_function = partial(_extract_features_per_kind, serial=True)
    else:
        raise ValueError("Argument parallelization must be one of: 'per_kind', 'per_sample'")

    result = calculation_function(kind_to_df_map,
                                  default_fc_parameters=default_fc_parameters,
                                  kind_to_fc_parameters=kind_to_fc_parameters,
                                  column_id=column_id,
                                  column_value=column_value,
                                  chunksize=chunksize,
                                  n_processes=n_processes,
                                  show_warnings=show_warnings,
                                  disable_progressbar=disable_progressbar,
                                  impute_function=impute_function
                                  )

    # Turn off profiling if it was turned on
    if profile:
        profiling.end_profiling(profiler, filename=profiling_filename,
                                sorting=profiling_sorting)

    return result
Exemplo n.º 19
0
    train_X_array = train_X.values
    train_X_reshape = train_X_array.reshape(792*2,3000)
    train_X_ts = pd.DataFrame(train_X_reshape)
    ids = np.empty([len(train_X_ts),2])
    for i in range(1,len(train_X_ts)+1):
        ids[i-1][0] = i//3+1
        ids[i-1][1] = i % 3
    train_X_ts['id'] = ids[:,0]
    train_X_ts['time'] = ids[:,1]
    train_X_ts.head()
    y = traindata['label']

    from tsfresh import extract_features, select_features
    from tsfresh.utilities.dataframe_functions import impute
    from tsfresh.feature_extraction.settings import ComprehensiveFCParameters
    extract_settings = ComprehensiveFCParameters()
    extracted_features = extract_features(train_X_ts,column_id="id",column_sort="time",default_fc_parameters=extract_settings, impute_function=impute)
    extracted_features.head()
    # from tsfresh import select_features
    # from tsfresh.utilities.dataframe_functions import impute
    # impute(extraced_features)
    # filtered_features = select_features(extraced_features, y)
    # filtered_features.head()

    #划分训练集和测试集
    x_train, x_test, y_train, y_test = train_test_split(train_X_feature[var], train_y, random_state=1, train_size=0.8)
    print("Size of training set:{} size of testing set:{}".format(x_train.shape[0],x_test.shape[0]))

    #训练SVM模型
    clf = svm.SVC(C=1, kernel='rbf', gamma=0.01, decision_function_shape='ovr')
    #训练
Exemplo n.º 20
0
Extract features:
'''
# Create wavelet features for each projected dimension,
# and stack both dimensions horizontally:
WavFeatures = WavTransform.WavTransform()
lab_ver_features = WavFeatures.createWavFeatures(lab_ver_denoised)
lab_hor_features = WavFeatures.createWavFeatures(lab_hor_denoised)
features_data = np.column_stack((lab_ver_features, lab_hor_features))

# Create TSFresh features for each projected dimension,
# and stack both dimensions horizontally:
lab_ver_for_tsf = ts_fresh.convert_signals_for_ts_fresh(
    sub_lab_ver_proj, "ver")
lab_ver_tsf_features = extract_features(
    lab_ver_for_tsf,
    default_fc_parameters=ComprehensiveFCParameters(),
    column_id="signal_id",
    column_sort="time")
lab_hor_for_tsf = ts_fresh.convert_signals_for_ts_fresh(
    sub_lab_hor_proj, "hor")
lab_hor_tsf_features = extract_features(
    lab_hor_for_tsf,
    default_fc_parameters=EfficientFCParameters(),
    column_id="signal_id",
    column_sort="time")
features_data = pd.concat([
    lab_ver_tsf_features, lab_hor_tsf_features,
    pd.DataFrame(lab_ver_features),
    pd.DataFrame(lab_hor_features)
],
                          axis=1)
Exemplo n.º 21
0
 def test_range_count_correctly_configured(self):
     fset = ComprehensiveFCParameters()
     params_range_count = fset["range_count"]
     for param in params_range_count:
         assert param["min"] < param["max"]