def test_imputes_categorical_features_using_training_set(self): training_features_cat = [ {'hair':'brown','eyes':'blue'}, {'hair':'black','eyes':'blue'}, ] features_cat = [ {'hair':'brown','eyes':None}, ] imputer = Imputer() imputed = imputer.impute_categorical(training_features_cat, features_cat) self.assertEqual(imputed[0], {'hair':'brown','eyes':'blue'})
def __init__(self, training_features_to_targets, features_to_targets): self.event_ids = [int(features_to_target[0]) for features_to_target in features_to_targets] self.targets = [features_to_target[2] for features_to_target in features_to_targets] self.training_categorical_features = [features_to_target[1]['categorical'] for features_to_target in training_features_to_targets] self.training_continuous_features = [features_to_target[1]['continuous'] for features_to_target in training_features_to_targets] imputer = Imputer() raw_continuous_features = [features_to_target[1]['continuous'] for features_to_target in features_to_targets] continuous_features = imputer.impute_continuous(self.training_continuous_features, raw_continuous_features) # continuous_features = self.scale(imputed_continuous_features) raw_categorical_features = [features_to_target[1]['categorical'] for features_to_target in features_to_targets] categorical_features = imputer.impute_categorical(self.training_categorical_features, raw_categorical_features) self.features = self.vectorize(categorical_features, continuous_features)