def test_dataframe_model(self, mimic_explainer, sample_cnt_per_grain, grains_dict): X, _ = create_timeseries_data(sample_cnt_per_grain, 'time', 'y', grains_dict) model = DataFrameTestModel(X.copy()) model = Pipeline([('test', model)]) features = list(X.columns.values) + list(X.index.names) model_task = ModelTask.Unknown kwargs = {'explainable_model_args': {'n_jobs': 1}, 'augment_data': False, 'reset_index': True} if grains_dict: kwargs['categorical_features'] = ['fruit'] mimic_explainer(model, X, LGBMExplainableModel, features=features, model_task=model_task, **kwargs)
def test_timestamp_featurization(self, sample_cnt_per_grain, grains_dict): # create timeseries data X, _ = create_timeseries_data(sample_cnt_per_grain, 'time', 'y', grains_dict) original_cols = list(X.columns.values) # featurize and validate the timestamp column featurizer = CustomTimestampFeaturizer(original_cols).fit(X) result = featurizer.transform(X) # Form a temporary dataframe for validation tmp_result = pd.DataFrame(result) # Assert there are no timestamp columns assert ([ column for column in tmp_result.columns if is_datetime(tmp_result[column]) ] == []) # Assert we have the expected number of columns - 1 time columns * 6 featurized plus original assert (result.shape[1] == len(original_cols) + 6)