def get_target_transformations(): transformations = [] transformations.append( Feature( input='total_cases', transformer=IdentityTransformer(), )) return transformations
def test_bad_feature_input(self): # bad input feature = Feature( input=3, transformer=sklearn.preprocessing.Imputer(), ) validator = FeatureValidator(self.X, self.y) result, failures = validator.validate(feature) self.assertFalse(result) self.assertIn('has_correct_input_type', failures)
def test_good_feature(self): feature = Feature( input='size', transformer=sklearn.preprocessing.Imputer(), ) validator = FeatureValidator(self.X, self.y) result, failures = validator.validate(feature) self.assertTrue(result) self.assertEqual(len(failures), 0)
def test_bad_feature_transform_errors(self): # transformer throws errors feature = Feature( input='size', transformer=FragileTransformer( (lambda x: True, ), (RuntimeError, )) ) validator = FeatureValidator(self.X, self.y) result, failures = validator.validate(feature) self.assertFalse(result) self.assertIn('can_transform', failures)
def test_bad_feature_deepcopy_fails(self): class _CopyFailsTransformer(IdentityTransformer): def __deepcopy__(self): raise RuntimeError feature = Feature( input='size', transformer=_CopyFailsTransformer(), ) validator = FeatureValidator(self.X, self.y) result, failures = validator.validate(feature) self.assertFalse(result) self.assertIn('can_deepcopy', failures)
def test_bad_feature_wrong_transform_length(self): class _WrongLengthTransformer( BaseEstimator, NoFitMixin, TransformerMixin): def transform(self, X, **transform_kwargs): new_shape = list(X.shape) new_shape[0] += 1 output = np.arange(np.prod(new_shape)).reshape(new_shape) return output # doesn't return correct length feature = Feature( input='size', transformer=_WrongLengthTransformer(), ) validator = FeatureValidator(self.X, self.y) result, failures = validator.validate(feature) self.assertFalse(result) self.assertIn('has_correct_output_dimensions', failures)
import sklearn.preprocessing from fhub_core.feature import Feature from fhub_transformers.missing import LagImputer features = [ Feature( input='ndvi_nw', transformer=[ LagImputer(groupby_kwargs={'level': 'city'}), sklearn.preprocessing.Imputer(), sklearn.preprocessing.StandardScaler(), ], ), ]
def get_feature_transformations(): features = [] features.append( Feature(input='ndvi_se', transformer=[ LagImputer(groupby_kwargs={'level': 'city'}), sklearn.preprocessing.Imputer(), sklearn.preprocessing.StandardScaler(), ])) features.append( Feature(input='ndvi_sw', transformer=[ LagImputer(groupby_kwargs={'level': 'city'}), sklearn.preprocessing.Imputer(), sklearn.preprocessing.StandardScaler(), ])) features.append( Feature(input='precipitation_amt_mm', transformer=[ LagImputer(groupby_kwargs={'level': 'city'}), sklearn.preprocessing.Imputer(), SimpleFunctionTransformer(np.log1p) ])) features.append( Feature(input='precipitation_amt_mm', transformer=[ NullIndicator(), ])) # Same basic cleaning of time series features. for input_ in [ 'reanalysis_air_temp_k', 'reanalysis_avg_temp_k', 'reanalysis_dew_point_temp_k', 'reanalysis_max_air_temp_k', 'reanalysis_min_air_temp_k', 'reanalysis_precip_amt_kg_per_m2', 'reanalysis_relative_humidity_percent', 'reanalysis_specific_humidity_g_per_kg', 'reanalysis_tdtr_k', 'station_avg_temp_c', 'station_diur_temp_rng_c', 'station_max_temp_c', 'station_min_temp_c', 'station_precip_mm', ]: features.append( Feature(input=input_, transformer=[ LagImputer(groupby_kwargs={'level': 'city'}), NullFiller(replacement=0.0), sklearn.preprocessing.StandardScaler(), ])) features.append( Feature(input='reanalysis_sat_precip_amt_mm', transformer=[ SingleLagger(1, groupby_kwargs={'level': 'city'}), LagImputer(groupby_kwargs={'level': 'city'}), NullFiller(replacement=0.0), ])) features.append( Feature(input=[ 'reanalysis_sat_precip_amt_mm', 'reanalysis_relative_humidity_percent', 'reanalysis_specific_humidity_g_per_kg', 'reanalysis_precip_amt_kg_per_m2', 'precipitation_amt_mm', 'station_precip_mm', ], transformer=[ LagImputer(groupby_kwargs={'level': 'city'}), sklearn.preprocessing.Imputer(), sklearn.decomposition.PCA(n_components=2), ])) # add contributed features contrib_features = get_contrib_features() features.extend(contrib_features) return features