def gen_global_feature(self, settings="comprehensive", full_settings=None, n_jobs=1): ''' Generate per-time-series feature for each time series. This method will be implemented by tsfresh. Make sure that the specified column name does not contain '__'. TODO: relationship with scale should be figured out. :param settings: str or dict. If a string is set, then it must be one of "comprehensive" "minimal" and "efficient". If a dict is set, then it should follow the instruction for default_fc_parameters in tsfresh. The value is defaulted to "comprehensive". :param full_settings: dict. It should follow the instruction for kind_to_fc_parameters in tsfresh. The value is defaulted to None. :param n_jobs: int. The number of processes to use for parallelization. :return: the tsdataset instance. ''' assert not self._has_generate_agg_feature, \ "Only one of gen_global_feature and gen_rolling_feature should be called." if full_settings is not None: self.df,\ addtional_feature =\ generate_global_features(input_df=self.df, column_id=self.id_col, column_sort=self.dt_col, kind_to_fc_parameters=full_settings, n_jobs=n_jobs) self.feature_col += addtional_feature return self if isinstance(settings, str): assert settings in ["comprehensive", "minimal", "efficient"], \ f"settings str should be one of \"comprehensive\", \"minimal\", \"efficient\"\ , but found {settings}." default_fc_parameters = DEFAULT_PARAMS[settings] else: default_fc_parameters = settings self.df,\ addtional_feature =\ generate_global_features(input_df=self.df, column_id=self.id_col, column_sort=self.dt_col, default_fc_parameters=default_fc_parameters, n_jobs=n_jobs) self.feature_col += addtional_feature self._has_generate_agg_feature = True return self
def gen_global_feature(self, settings="comprehensive", full_settings=None): ''' Generate per-time-series feature for each time series. This method will be implemented by tsfresh. :param settings: str or dict. If a string is set, then it must be one of "comprehensive" "minimal" and "efficient". If a dict is set then it should follow the instruction for default_fc_parameters in tsfresh. The value is defaulted to "comprehensive". :param full_settings: dict. It should follow the instruction for kind_to_fc_parameters in tsfresh. The value is defaulted to None. :return: the tsdataset instance. ''' if full_settings is not None: self.df = generate_global_features( input_df=self.df, column_id=self.id_col, column_sort=self.dt_col, kind_to_fc_parameters=full_settings) return self from tsfresh.feature_extraction import ComprehensiveFCParameters,\ MinimalFCParameters, EfficientFCParameters default_params = { "comprehensive": ComprehensiveFCParameters(), "minimal": MinimalFCParameters(), "efficient": EfficientFCParameters() } if isinstance(settings, str): assert settings in ["comprehensive", "minimal", "efficient"], \ f"settings str should be one of \"comprehensive\", \"minimal\", \"efficient\"\ , but found {settings}." default_fc_parameters = default_params[settings] else: default_fc_parameters = settings self.df,\ addtional_feature =\ generate_global_features(input_df=self.df, column_id=self.id_col, column_sort=self.dt_col, default_fc_parameters=default_fc_parameters) self.feature_col += addtional_feature return self
def test_gen_global_feature_multi_id(self): dates = pd.date_range('1/1/2019', periods=8) data = np.random.randn(8, 3) df = pd.DataFrame({ "datetime": dates, "values": data[:, 0], "A": data[:, 1], "B": data[:, 2], "id": ["00"] * 4 + ["01"] * 4 }) from tsfresh.feature_extraction import MinimalFCParameters for params in [MinimalFCParameters()]: output_df, _ = generate_global_features( input_df=df, column_id="id", column_sort="datetime", default_fc_parameters=params) assert "datetime" in output_df.columns assert "values" in output_df.columns assert "A" in output_df.columns assert "B" in output_df.columns assert "id" in output_df.columns for col in output_df.columns: if col in ["datetime", "values", "A", "B", "id"]: continue assert len(set(output_df[output_df["id"] == "00"][col])) == 1 assert len(set(output_df[output_df["id"] == "01"][col])) == 1 assert output_df[output_df["id"] == "00"][col].isna().sum() == 0 assert output_df[output_df["id"] == "01"][col].isna().sum() == 0