def test_from_columns(self): tsn = "TEST_TIME_SERIES" fset = FeatureExtractionSettings() self.assertRaises(TypeError, fset.from_columns, 42) self.assertRaises(TypeError, fset.from_columns, 42) self.assertRaises(ValueError, fset.from_columns, ["This is not a column name"]) self.assertRaises(ValueError, fset.from_columns, ["This__neither"]) self.assertRaises(ValueError, fset.from_columns, ["This__also__not"]) # Aggregate functions feature_names = [tsn + '__sum_values', tsn + "__median", tsn + "__length", tsn + "__sample_entropy"] # Aggregate functions with params feature_names += [tsn + '__quantile__q_10', tsn + '__quantile__q_70', tsn + '__number_peaks__n_30', tsn + '__value_count__value_inf', tsn + '__value_count__value_-inf', tsn + '__value_count__value_nan'] # Apply functions feature_names += [tsn + '__ar_coefficient__k_20__coeff_4', tsn + '__ar_coefficient__coeff_10__k_-1'] cset = fset.from_columns(feature_names) six.assertCountEqual(self, list(cset.kind_to_calculation_settings_mapping[tsn].keys()), ["sum_values", "median", "length", "sample_entropy", "quantile", "number_peaks", "ar_coefficient", "value_count"]) self.assertEqual(cset.kind_to_calculation_settings_mapping[tsn]["sum_values"], None) self.assertEqual(cset.kind_to_calculation_settings_mapping[tsn]["ar_coefficient"], [{"k": 20, "coeff": 4}, {"k": -1, "coeff": 10}]) self.assertEqual(cset.kind_to_calculation_settings_mapping[tsn]["value_count"], [{"value": np.PINF}, {"value": np.NINF}, {"value": np.NaN}])
def transform(self, X): """ After the fit step, it is known which features are relevant. Only extract those from the time series handed in with the function :func:`~set_timeseries_container`. If evaluate_only_added_features is False, also delete the irrelevant, already present features in the data frame. :param X: the data sample to add the relevant (and delete the irrelevant) features to. :type X: pandas.DataFrame or numpy.array :return: a data sample with the same information as X, but with added relevant time series features and deleted irrelevant information (only if evaluate_only_added_features is False). :rtype: pandas.DataFrame """ if self.feature_selector.relevant_features is None: raise RuntimeError("You have to call fit before.") if self.timeseries_container is None: raise RuntimeError( "You have to provide a time series using the set_timeseries_container function before." ) self.feature_extractor.set_timeseries_container( self.timeseries_container) relevant_time_series_features = set( self.feature_selector.relevant_features) - set( pd.DataFrame(X).columns) relevant_extraction_settings = FeatureExtractionSettings.from_columns( relevant_time_series_features) relevant_extraction_settings.set_default = False # Set imputing strategy if self.feature_extractor.settings.IMPUTE is impute_dataframe_range: relevant_extraction_settings.IMPUTE = partial( impute_dataframe_range, col_to_max=self.col_to_max, col_to_min=self.col_to_min, col_to_median=self.col_to_median) else: relevant_extraction_settings.IMPUTE = self.feature_extractor.settings.IMPUTE relevant_feature_extractor = FeatureAugmenter( settings=relevant_extraction_settings, column_id=self.feature_extractor.column_id, column_sort=self.feature_extractor.column_sort, column_kind=self.feature_extractor.column_kind, column_value=self.feature_extractor.column_value) relevant_feature_extractor.set_timeseries_container( self.feature_extractor.timeseries_container) X_augmented = relevant_feature_extractor.transform(X) return X_augmented.copy().loc[:, self.feature_selector.relevant_features]
def transform(self, X): """ After the fit step, it is known which features are relevant. Only extract those from the time series handed in with the function :func:`~set_timeseries_container`. If evaluate_only_added_features is False, also delete the irrelevant, already present features in the data frame. :param X: the data sample to add the relevant (and delete the irrelevant) features to. :type X: pandas.DataFrame or numpy.array :return: a data sample with the same information as X, but with added relevant time series features and deleted irrelevant information (only if evaluate_only_added_features is False). :rtype: pandas.DataFrame """ if self.feature_selector.relevant_features is None: raise RuntimeError("You have to call fit before.") if self.timeseries_container is None: raise RuntimeError( "You have to provide a time series using the set_timeseries_container function before." ) self.feature_extractor.set_timeseries_container( self.timeseries_container) # We can only extract features that originate from time series relevant_extraction_settings = FeatureExtractionSettings.from_columns( list( set(self.feature_selector.relevant_features) - set(pd.DataFrame(X).columns))) relevant_extraction_settings.set_default = False relevant_extraction_settings.IMPUTE = self.feature_extractor.settings.IMPUTE feature_augmenter_restricted = FeatureAugmenter( settings=relevant_extraction_settings, column_id=self.feature_extractor.column_id, column_sort=self.feature_extractor.column_sort, column_kind=self.feature_extractor.column_kind, column_value=self.feature_extractor.column_value) feature_augmenter_restricted.set_timeseries_container( self.feature_extractor.timeseries_container) if self.evaluate_only_added_features: X_tsfresh = feature_augmenter_restricted.transform( X).loc[:, self.feature_selector.relevant_features] return pd.concat([X_tsfresh, X], axis=1) else: X_tsfresh = feature_augmenter_restricted.transform(X) return X_tsfresh.loc[:, self.feature_selector.relevant_features]