示例#1
0
def get_target_transformations():
    transformations = []
    transformations.append(
        Feature(
            input='total_cases',
            transformer=IdentityTransformer(),
        ))
    return transformations
示例#2
0
 def test_bad_feature_input(self):
     # bad input
     feature = Feature(
         input=3,
         transformer=sklearn.preprocessing.Imputer(),
     )
     validator = FeatureValidator(self.X, self.y)
     result, failures = validator.validate(feature)
     self.assertFalse(result)
     self.assertIn('has_correct_input_type', failures)
示例#3
0
    def test_good_feature(self):
        feature = Feature(
            input='size',
            transformer=sklearn.preprocessing.Imputer(),
        )

        validator = FeatureValidator(self.X, self.y)
        result, failures = validator.validate(feature)
        self.assertTrue(result)
        self.assertEqual(len(failures), 0)
示例#4
0
 def test_bad_feature_transform_errors(self):
     # transformer throws errors
     feature = Feature(
         input='size',
         transformer=FragileTransformer(
             (lambda x: True, ), (RuntimeError, ))
     )
     validator = FeatureValidator(self.X, self.y)
     result, failures = validator.validate(feature)
     self.assertFalse(result)
     self.assertIn('can_transform', failures)
示例#5
0
 def test_bad_feature_deepcopy_fails(self):
     class _CopyFailsTransformer(IdentityTransformer):
         def __deepcopy__(self):
             raise RuntimeError
     feature = Feature(
         input='size',
         transformer=_CopyFailsTransformer(),
     )
     validator = FeatureValidator(self.X, self.y)
     result, failures = validator.validate(feature)
     self.assertFalse(result)
     self.assertIn('can_deepcopy', failures)
示例#6
0
    def test_bad_feature_wrong_transform_length(self):
        class _WrongLengthTransformer(
                BaseEstimator, NoFitMixin, TransformerMixin):
            def transform(self, X, **transform_kwargs):
                new_shape = list(X.shape)
                new_shape[0] += 1
                output = np.arange(np.prod(new_shape)).reshape(new_shape)
                return output

        # doesn't return correct length
        feature = Feature(
            input='size',
            transformer=_WrongLengthTransformer(),
        )
        validator = FeatureValidator(self.X, self.y)
        result, failures = validator.validate(feature)
        self.assertFalse(result)
        self.assertIn('has_correct_output_dimensions', failures)
示例#7
0
import sklearn.preprocessing
from fhub_core.feature import Feature
from fhub_transformers.missing import LagImputer

features = [
    Feature(
        input='ndvi_nw',
        transformer=[
            LagImputer(groupby_kwargs={'level': 'city'}),
            sklearn.preprocessing.Imputer(),
            sklearn.preprocessing.StandardScaler(),
        ],
    ),
]
示例#8
0
def get_feature_transformations():
    features = []

    features.append(
        Feature(input='ndvi_se',
                transformer=[
                    LagImputer(groupby_kwargs={'level': 'city'}),
                    sklearn.preprocessing.Imputer(),
                    sklearn.preprocessing.StandardScaler(),
                ]))

    features.append(
        Feature(input='ndvi_sw',
                transformer=[
                    LagImputer(groupby_kwargs={'level': 'city'}),
                    sklearn.preprocessing.Imputer(),
                    sklearn.preprocessing.StandardScaler(),
                ]))

    features.append(
        Feature(input='precipitation_amt_mm',
                transformer=[
                    LagImputer(groupby_kwargs={'level': 'city'}),
                    sklearn.preprocessing.Imputer(),
                    SimpleFunctionTransformer(np.log1p)
                ]))

    features.append(
        Feature(input='precipitation_amt_mm', transformer=[
            NullIndicator(),
        ]))

    # Same basic cleaning of time series features.
    for input_ in [
            'reanalysis_air_temp_k',
            'reanalysis_avg_temp_k',
            'reanalysis_dew_point_temp_k',
            'reanalysis_max_air_temp_k',
            'reanalysis_min_air_temp_k',
            'reanalysis_precip_amt_kg_per_m2',
            'reanalysis_relative_humidity_percent',
            'reanalysis_specific_humidity_g_per_kg',
            'reanalysis_tdtr_k',
            'station_avg_temp_c',
            'station_diur_temp_rng_c',
            'station_max_temp_c',
            'station_min_temp_c',
            'station_precip_mm',
    ]:
        features.append(
            Feature(input=input_,
                    transformer=[
                        LagImputer(groupby_kwargs={'level': 'city'}),
                        NullFiller(replacement=0.0),
                        sklearn.preprocessing.StandardScaler(),
                    ]))

    features.append(
        Feature(input='reanalysis_sat_precip_amt_mm',
                transformer=[
                    SingleLagger(1, groupby_kwargs={'level': 'city'}),
                    LagImputer(groupby_kwargs={'level': 'city'}),
                    NullFiller(replacement=0.0),
                ]))

    features.append(
        Feature(input=[
            'reanalysis_sat_precip_amt_mm',
            'reanalysis_relative_humidity_percent',
            'reanalysis_specific_humidity_g_per_kg',
            'reanalysis_precip_amt_kg_per_m2',
            'precipitation_amt_mm',
            'station_precip_mm',
        ],
                transformer=[
                    LagImputer(groupby_kwargs={'level': 'city'}),
                    sklearn.preprocessing.Imputer(),
                    sklearn.decomposition.PCA(n_components=2),
                ]))

    # add contributed features
    contrib_features = get_contrib_features()
    features.extend(contrib_features)

    return features