def test_add_constraint(self): init = self.sk_pca.hyperparam_schema() expected = { 'allOf': [{ 'type': 'object', 'properties': {} }, { 'anyOf': [{ 'type': 'object', 'properties': { 'n_components': { 'not': { 'enum': ['mle'] }, } }, }, { 'type': 'object', 'properties': { 'svd_solver': { 'enum': ['full', 'auto'] }, } }] }] } foo = self.sk_pca.customize_schema(constraint=schemas.AnyOf([ schemas.Object( {'n_components': schemas.Not(schemas.Enum(['mle']))}), schemas.Object({'svd_solver': schemas.Enum(['full', 'auto'])}) ])) self.assertEqual(foo.hyperparam_schema(), expected) helpers.validate_is_schema(foo._schemas) self.assertEqual(self.sk_pca.hyperparam_schema(), init)
def test_override_null_param(self): init = self.ll_pca.hyperparam_schema('n_components') expected = {'enum': [None]} foo = self.ll_pca.customize_schema(n_components=schemas.Null()) self.assertEqual(foo.hyperparam_schema('n_components'), expected) helpers.validate_is_schema(foo._schemas) self.assertEqual(self.ll_pca.hyperparam_schema('n_components'), init)
def test_override_output2(self): init_output_schema = self.sk_pca.get_schema('output') pca_output = schemas.AnyOf([ schemas.Array(schemas.Array(schemas.Float())), schemas.Array(schemas.Float()) ]) expected = { 'anyOf': [{ 'type': 'array', 'items': { 'type': 'array', 'items': { 'type': 'number' } } }, { 'type': 'array', 'items': { 'type': 'number' } }] } foo = self.sk_pca.customize_schema(output=pca_output) self.assertEqual(foo.get_schema('output'), expected) helpers.validate_is_schema(foo._schemas) self.assertEqual(self.sk_pca.get_schema('output'), init_output_schema)
def test_override_bool_param_ll(self): init = self.ll_pca.hyperparam_schema('whiten') expected = {'default': True, 'type': 'boolean'} foo = self.ll_pca.customize_schema(whiten=schemas.Bool(default=True)) self.assertEqual(foo.hyperparam_schema('whiten'), expected) helpers.validate_is_schema(foo._schemas) self.assertEqual(self.ll_pca.hyperparam_schema('whiten'), init) self.assertRaises(Exception, self.ll_pca.customize_schema, whitenX={})
def test_override_enum_param(self): init = self.ll_pca.hyperparam_schema('svd_solver') expected = {'default': 'full', 'enum': ['auto', 'full']} foo = self.ll_pca.customize_schema( svd_solver=schemas.Enum(default='full', values=['auto', 'full'])) self.assertEqual(foo.hyperparam_schema('svd_solver'), expected) helpers.validate_is_schema(foo._schemas) self.assertEqual(self.ll_pca.hyperparam_schema('svd_solver'), init)
def test_override_output(self): init_output_schema = self.sk_pca.get_schema('output') pca_output = self.ll_pca.get_schema('output') foo = self.sk_pca.customize_schema(output=schemas.JSON(pca_output)) self.assertEqual(foo.get_schema('output'), pca_output) helpers.validate_is_schema(foo._schemas) self.assertEqual(self.sk_pca.get_schema('output'), init_output_schema) self.assertRaises(Exception, self.sk_pca.customize_schema, output={}) self.assertRaises(Exception, self.sk_pca.customize_schema, output_foo=pca_output)
def test_override_json_param(self): init = self.ll_pca.hyperparam_schema('tol') expected = { 'description': 'Tol', 'type': 'number', 'minimum': 0.2, 'default': 1.0 } foo = self.ll_pca.customize_schema(tol=schemas.JSON(expected)) self.assertEqual(foo.hyperparam_schema('tol'), expected) helpers.validate_is_schema(foo._schemas) self.assertEqual(self.ll_pca.hyperparam_schema('tol'), init)
def test_override_tags(self): init = self.ll_pca._schemas['tags'] tags = { 'pre': ['~categoricals'], 'op': ['estimator', 'classifier', 'interpretable'], 'post': ['probabilities'] } foo = self.ll_pca.customize_schema(tags=tags) self.assertEqual(foo._schemas['tags'], tags) helpers.validate_is_schema(foo._schemas) self.assertEqual(self.ll_pca._schemas['tags'], init) self.assertRaises(Exception, self.sk_pca.customize_schema, tags=42)
def test_override_int_param(self): init = self.ll_pca.hyperparam_schema('iterated_power') expected = { 'default': 1, 'type': 'integer', 'minimum': -10, 'maximum': 10, 'exclusiveMaximum': True, 'exclusiveMinimum': False } foo = self.ll_pca.customize_schema(iterated_power=schemas.Int( default=1, min=-10, max=10, exclusiveMax=True, exclusiveMin=False)) self.assertEqual(foo.hyperparam_schema('iterated_power'), expected) helpers.validate_is_schema(foo._schemas) self.assertEqual(self.ll_pca.hyperparam_schema('iterated_power'), init)
def test_override_array_param(self): init = self.sk_pca.hyperparam_schema('copy') expected = { 'type': 'array', 'minItems': 1, 'maxItems': 20, 'items': { 'type': 'integer' } } foo = self.sk_pca.customize_schema( copy=schemas.Array(minItems=1, maxItems=20, items=schemas.Int())) self.assertEqual(foo.hyperparam_schema('copy'), expected) helpers.validate_is_schema(foo._schemas) self.assertEqual(self.sk_pca.hyperparam_schema('copy'), init)
def test_override_relevant(self): init = self.ll_pca.hyperparam_schema( )['allOf'][0]['relevantToOptimizer'] expected = ['svd_solver'] foo = self.ll_pca.customize_schema(relevantToOptimizer=['svd_solver']) self.assertEqual( foo.hyperparam_schema()['allOf'][0]['relevantToOptimizer'], expected) helpers.validate_is_schema(foo._schemas) self.assertEqual( self.ll_pca.hyperparam_schema()['allOf'][0]['relevantToOptimizer'], init) self.assertRaises(Exception, self.sk_pca.customize_schema, relevantToOptimizer={})
def test_override_any_param(self): init = self.ll_pca.hyperparam_schema('iterated_power') expected = { 'anyOf': [{ 'type': 'integer' }, { 'enum': ['auto', 'full'] }], 'default': 'auto' } foo = self.ll_pca.customize_schema(iterated_power=schemas.AnyOf( [schemas.Int(), schemas.Enum(['auto', 'full'])], default='auto')) self.assertEqual(foo.hyperparam_schema('iterated_power'), expected) helpers.validate_is_schema(foo._schemas) self.assertEqual(self.ll_pca.hyperparam_schema('iterated_power'), init)
def test_override_float_param(self): init = self.ll_pca.hyperparam_schema('tol') expected = { 'default': 0.1, 'type': 'number', 'minimum': -10, 'maximum': 10, 'exclusiveMaximum': True, 'exclusiveMinimum': False } foo = self.ll_pca.customize_schema( tol=schemas.Float(default=0.1, min=-10, max=10, exclusiveMax=True, exclusiveMin=False)) self.assertEqual(foo.hyperparam_schema('tol'), expected) helpers.validate_is_schema(foo._schemas) self.assertEqual(self.ll_pca.hyperparam_schema('tol'), init)
def test_override_object_param(self): init = self.sk_pca.get_schema('input_fit') expected = { 'type': 'object', 'required': ['X'], 'additionalProperties': False, 'properties': { 'X': { 'type': 'array', 'items': { 'type': 'number' } } } } foo = self.sk_pca.customize_schema( input_fit=schemas.Object(required=['X'], additionalProperties=False, X=schemas.Array(schemas.Float()))) self.assertEqual(foo.get_schema('input_fit'), expected) helpers.validate_is_schema(foo.get_schema('input_fit')) self.assertEqual(self.sk_pca.get_schema('input_fit'), init)
def test_feature_preprocessor(self): X_train, y_train = self.X_train, self.y_train X_test, y_test = self.X_test, self.y_test import importlib module_name = ".".join(fproc_name.split('.')[0:-1]) class_name = fproc_name.split('.')[-1] module = importlib.import_module(module_name) class_ = getattr(module, class_name) fproc = class_() from lale.lib.sklearn.one_hot_encoder import OneHotEncoderImpl if isinstance(fproc._impl, OneHotEncoderImpl): #fproc = OneHotEncoder(handle_unknown = 'ignore') #remove the hack when this is fixed fproc = PCA() #test_schemas_are_schemas from lale.helpers import validate_is_schema validate_is_schema(fproc.input_schema_fit()) validate_is_schema(fproc.input_schema_transform()) validate_is_schema(fproc.output_schema()) validate_is_schema(fproc.hyperparam_schema()) #test_init_fit_transform trained = fproc.fit(self.X_train, self.y_train) predictions = trained.transform(self.X_test) #test_predict_on_trainable trained = fproc.fit(X_train, y_train) fproc.transform(X_train) #test_to_json fproc.to_json() #test_in_a_pipeline #This test assumes that the output of feature processing is compatible with LogisticRegression from lale.lib.sklearn import LogisticRegression pipeline = fproc >> LogisticRegression() trained = pipeline.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test) #Tune the pipeline with LR using HyperoptClassifier from lale.lib.lale import HyperoptClassifier hyperopt = HyperoptClassifier(model=pipeline, max_evals=1) trained = hyperopt.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test)
def test_encoder(self): import importlib module_name = ".".join(encoder_name.split('.')[0:-1]) class_name = encoder_name.split('.')[-1] module = importlib.import_module(module_name) class_ = getattr(module, class_name) encoder = class_() #test_schemas_are_schemas from lale.helpers import validate_is_schema validate_is_schema(encoder.input_schema_fit()) validate_is_schema(encoder.input_schema_predict()) validate_is_schema(encoder.output_schema()) validate_is_schema(encoder.hyperparam_schema()) #test_init_fit_transform trained = encoder.fit(self.X_train, self.y_train) transformed = trained.transform(self.X_train)
def test_regressor(self): X_train, y_train = self.X_train, self.y_train X_test, y_test = self.X_test, self.y_test import importlib module_name = ".".join(clf_name.split('.')[0:-1]) class_name = clf_name.split('.')[-1] module = importlib.import_module(module_name) class_ = getattr(module, class_name) regr = class_() #test_schemas_are_schemas from lale.helpers import validate_is_schema validate_is_schema(regr.input_schema_fit()) validate_is_schema(regr.input_schema_predict()) validate_is_schema(regr.output_schema()) validate_is_schema(regr.hyperparam_schema()) #test_init_fit_predict trained = regr.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test) #test_predict_on_trainable trained = regr.fit(X_train, y_train) regr.predict(X_train) #test_to_json regr.to_json() #test_in_a_pipeline pipeline = NoOp() >> regr trained = pipeline.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test) #test_with_hyperopt from lale.lib.sklearn.ridge import RidgeImpl if not isinstance(regr._impl, RidgeImpl): from lale.lib.lale import HyperoptRegressor hyperopt = HyperoptRegressor(model=pipeline, max_evals=1) trained = hyperopt.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test)
'post': [] }, 'properties': { 'input_fit': _input_schema_fit, 'input_predict': _input_schema_predict, 'output': _output_schema, 'hyperparams': _hyperparams_schema } } ResNet50 = make_operator(ResNet50Impl, _combined_schemas) if __name__ == "__main__": import torchvision.datasets as datasets from lale.helpers import validate_is_schema validate_is_schema(_combined_schemas) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[n / 255. for n in [129.3, 124.1, 112.4]], std=[n / 255. for n in [68.2, 65.4, 70.4]]) ]) # meanstd transformation transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[n / 255. for n in [129.3, 124.1, 112.4]], std=[n / 255. for n in [68.2, 65.4, 70.4]]) ])
def test_add_constraint(self): init = self.sk_pca.hyperparam_schema() init_expected = { 'allOf': [{ 'type': 'object', 'properties': { 'n_components': { 'default': None }, 'copy': { 'default': True }, 'whiten': { 'default': False }, 'svd_solver': { 'default': 'auto' }, 'tol': { 'default': 0.0 }, 'iterated_power': { 'default': 'auto' }, 'random_state': { 'default': None } } }] } self.assertEqual(init, init_expected) expected = { 'allOf': [ init_expected['allOf'][0], { 'anyOf': [{ 'type': 'object', 'properties': { 'n_components': { 'not': { 'enum': ['mle'] }, } }, }, { 'type': 'object', 'properties': { 'svd_solver': { 'enum': ['full', 'auto'] }, } }] } ] } foo = self.sk_pca.customize_schema(constraint=schemas.AnyOf([ schemas.Object(n_components=schemas.Not(schemas.Enum(['mle']))), schemas.Object(svd_solver=schemas.Enum(['full', 'auto'])) ])) self.assertEqual(foo.hyperparam_schema(), expected) helpers.validate_is_schema(foo._schemas) self.assertEqual(self.sk_pca.hyperparam_schema(), init)
def test_classifier(self): X_train, y_train = self.X_train, self.y_train X_test, y_test = self.X_test, self.y_test import importlib module_name = ".".join(clf_name.split('.')[0:-1]) class_name = clf_name.split('.')[-1] module = importlib.import_module(module_name) class_ = getattr(module, class_name) clf = class_() #test_schemas_are_schemas from lale.helpers import validate_is_schema validate_is_schema(clf.input_schema_fit()) validate_is_schema(clf.input_schema_predict()) validate_is_schema(clf.output_schema()) validate_is_schema(clf.hyperparam_schema()) #test_init_fit_predict trained = clf.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test) #test_with_hyperopt from lale.lib.lale import HyperoptClassifier hyperopt = HyperoptClassifier(model=clf, max_evals=1) trained = hyperopt.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test) #test_cross_validation from lale.helpers import cross_val_score cv_results = cross_val_score(clf, X_train, y_train, cv=2) self.assertEqual(len(cv_results), 2) #test_with_gridsearchcv_auto_wrapped from sklearn.metrics import accuracy_score, make_scorer with warnings.catch_warnings(): warnings.simplefilter("ignore") from lale.lib.sklearn.gradient_boosting_classifier import GradientBoostingClassifierImpl from lale.lib.sklearn.mlp_classifier import MLPClassifierImpl if isinstance(clf._impl, GradientBoostingClassifierImpl): #because exponential loss does not work with iris dataset as it is not binary classification import lale.schemas as schemas clf = clf.customize_schema( loss=schemas.Enum(default='deviance', values=['deviance'])) if not isinstance(clf._impl, MLPClassifierImpl): #mlp fails due to issue #164. grid_search = LaleGridSearchCV( clf, lale_num_samples=1, lale_num_grids=1, cv=2, scoring=make_scorer(accuracy_score)) grid_search.fit(X_train, y_train) #test_predict_on_trainable trained = clf.fit(X_train, y_train) clf.predict(X_train) #test_to_json clf.to_json() #test_in_a_pipeline pipeline = NoOp() >> clf trained = pipeline.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test)
} }] } _combined_schemas = { '$schema': 'http://json-schema.org/draft-04/schema#', 'description': 'Combined schema for expected data and hyperparameters for a transformer for' ' a text data transformer based on pre-trained BERT model ' '(https://github.com/huggingface/pytorch-pretrained-BERT).', 'type': 'object', 'tags': { 'pre': [], 'op': [], 'post': [] }, 'properties': { 'input_fit': _input_schema_fit, 'input_predict': _input_schema_predict, 'output': _output_schema, 'hyperparams': _hyperparams_schema } } if __name__ == "__main__": helpers.validate_is_schema(_combined_schemas) Batching = make_operator(BatchingImpl, _combined_schemas)