def test_find_best_regressor_with_eval(self): """Test find best in grid_search with custom eval function""" grid_result = self.context.models.grid_search( self.regressor_frame, self.regressor_frame, [( self.context.models.regression.linear_regression, { "observation_columns": ["feat1", "feat2"], "label_column": "class", "max_iterations": grid_values(*(5, 50)), "elastic_net_parameter": 0.001}), ( self.context.models.regression.random_forest_regressor, { "observation_columns": ["feat1", "feat2"], "label_column": "class", "max_depth": grid_values(*xrange(2, 10)), "num_trees": 2})], lambda a, b: getattr(a, "root_mean_squared_error") < getattr(b, "root_mean_squared_error")) best_model = grid_result.find_best() self.assertEqual( best_model.descriptor.model_type.__name__, "sparktk.models.regression.random_forest_regressor") self.assertAlmostEqual( best_model.metrics.root_mean_squared_error, 0.37, delta=0.01)
def test_default_num_fold(self): """Test cross validate with default num_fold parameter""" result = self.context.models.cross_validate( self.classifier_frame, [(self.context.models.classification.svm, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(5, 100), "step_size": 0.01 }), (self.context.models.classification.logistic_regression, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(2, 5, 15), "step_size": 0.001 })], verbose=False) # validate number of models (svm_count, log_count, num_models) = self._get_model_counts(result, "svm") expected_num_models = 3 * (2 + 3) self.assertEquals(num_models, expected_num_models) self.assertEqual(svm_count, 6) self.assertEqual(log_count, 9)
def test_incorect_hyper_parameter(self): """Test incorrect hyper parameter name for a model throws exception""" with self.assertRaisesRegexp(Exception, "unknown args named: BAD"): self.context.models.grid_search( self.classifier_frame, self.classifier_frame, [(self.context.models.classification.svm, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "BAD": "res", "num_iterations": grid_values(5, 100), "step_size": 0.01 }), (self.context.models.classification.logistic_regression, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "BAD": "res", "num_iterations": grid_values(2, 15), "step_size": 0.001 })])
def test_find_best_classifier_default(self): """Test find best in grid_search with default eval function""" grid_result = self.context.models.grid_search( self.classifier_frame, self.classifier_frame, [(self.context.models.classification.svm, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(5, 10), "step_size": 0.01 }), (self.context.models.classification.logistic_regression, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(*xrange(2, 15)), "step_size": 0.001 })]) best_model = grid_result.find_best() self.assertEqual(best_model.descriptor.model_type.__name__, "sparktk.models.classification.logistic_regression") self.assertAlmostEqual(best_model.metrics.accuracy, 0.87688, delta=0.01)
def test_two_folds(self): """Test cross validate with num_folds = 2""" result = self.context.models.cross_validate( self.regressor_frame, [(self.context.models.regression.linear_regression, { "observation_columns": ["feat1", "feat2"], "label_column": "class", "max_iterations": grid_values(5, 100), "reg_param": 0.0001 }), (self.context.models.regression.random_forest_regressor, { "observation_columns": ["feat1", "feat2"], "label_column": "class", "num_trees": grid_values(2, 5, 8), "max_depth": 5 })], verbose=False, num_folds=2) # validate number of models (rf_count, linreg_count, num_models) = self._get_model_counts(result, "random") expected_num_models = 2 * (2 + 3) self.assertEquals(num_models, expected_num_models) self.assertEqual(rf_count, 6) self.assertEqual(linreg_count, 4)
def test_invalid_num_fold(self): """Test cross validate with num_fold > number of data points""" with self.assertRaisesRegexp(Exception, "empty collection"): result = self.context.models.cross_validate( self.classifier_frame, [(self.context.models.classification.svm, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(5, 100), "step_size": 0.01 }), (self.context.models.classification.logistic_regression, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(2, 15), "step_size": 0.001 })], num_folds=1000000, verbose=False)
def test_all_results_regressors(self): """Test number of regressors created given 5 folds """ result = self.context.models.cross_validate( self.regressor_frame, [(self.context.models.regression.linear_regression, { "observation_columns": ["feat1", "feat2"], "label_column": "class", "max_iterations": grid_values(*xrange(5, 10)), "elastic_net_parameter": 0.001 }), (self.context.models.regression.random_forest_regressor, { "observation_columns": ["feat1", "feat2"], "label_column": "class", "num_trees": grid_values(2, 5, 15), "max_depth": 5 })], num_folds=5, verbose=False) # validate number of models (rf_count, linreg_count, num_models) = self._get_model_counts(result, "random_forest") expected_num_models = 5 * (5 + 3) self.assertEquals(rf_count + linreg_count, expected_num_models) self.assertEqual(rf_count, 15) self.assertEqual(linreg_count, 25)
def test_find_best_classifier_default(self): """Test find best in grid_search with default eval function""" grid_result = self.context.models.grid_search( self.classifier_frame, self.classifier_frame, [( self.context.models.classification.svm, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(5, 10), "step_size": 0.01}), ( self.context.models.classification.logistic_regression, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(*xrange(2, 15)), "step_size": 0.001})]) best_model = grid_result.find_best() self.assertEqual( best_model.descriptor.model_type.__name__, "sparktk.models.classification.logistic_regression") self.assertAlmostEqual( best_model.metrics.accuracy, 0.87688, delta=0.01)
def test_invalid_model(self): """Test cross validate with invalid model""" with self.assertRaisesRegexp(Exception, "no attribute \'BAD\'"): result = self.context.models.cross_validate( self.classifier_frame, [(self.context.models.classification.BAD, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(5, 100), "step_size": 0.01 }), (self.context.models.classification.logistic_regression, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(2, 15), "step_size": 0.001 })], num_folds=2.5, verbose=False)
def test_averages_regressors(self): """Test ouptut of cross validatation averages for regressors""" result = self.context.models.cross_validate( self.regressor_frame, [(self.context.models.regression.linear_regression, { "observation_columns": ["feat1", "feat2"], "label_column": "class", "max_iterations": grid_values(*xrange(10, 20)), "reg_param": 0.001 }), (self.context.models.regression.random_forest_regressor, { "observation_columns": ["feat1", "feat2"], "label_column": "class", "num_trees": grid_values(*xrange(2, 5)), "max_depth": 4 })], num_folds=3, verbose=False) avg_models = result.averages # validate num of models self.assertEqual(len(avg_models.grid_points), 13) # validate model with best accuracy best_model = avg_models.find_best() self.assertEqual(best_model.descriptor.model_type.__name__, "sparktk.models.regression.random_forest_regressor") self.assertAlmostEqual(best_model.metrics.r2, 0.415, delta=0.01)
def test_averages_classifiers(self): """Test ouptut of cross validatation averages for classifiers""" result = self.context.models.cross_validate( self.classifier_frame, [(self.context.models.classification.svm, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(5, 100), "step_size": 0.01 }), (self.context.models.classification.logistic_regression, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(2, 15), "step_size": 0.001 })], num_folds=3, verbose=False) avg_models = result.averages # validate num of models self.assertEqual(len(avg_models.grid_points), 4) # validate model with best accuracy best_model = avg_models.find_best() self.assertEqual(best_model.descriptor.model_type.__name__, "sparktk.models.classification.logistic_regression") self.assertAlmostEqual(best_model.metrics.accuracy, .87, delta=0.01)
def test_float_num_fold(self): """Test cross validate with float num_fold""" with self.assertRaisesRegexp(Exception, "integer argument expected, got float"): result = self.context.models.cross_validate( self.classifier_frame, [(self.context.models.classification.svm, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(5, 100), "step_size": 0.01 }), (self.context.models.classification.logistic_regression, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(2, 15), "step_size": 0.001 })], num_folds=2.5, verbose=False)
def test_invalid_eval_name(self): """Test grid search throws exception for invalid model name""" grid_result = self.context.models.grid_search( self.classifier_frame, self.classifier_frame, [( self.context.models.classification.svm, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column":"res", "num_iterations": grid_values(5, 10), "step_size": 0.01 }), ( self.context.models.classification.logistic_regression, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(*xrange(2, 15)), "step_size": 0.001 })], lambda a, b: getattr(a, "root_mean_squared_error") < getattr(b, "root_mean_squared_error")) with self.assertRaisesRegexp( Exception, "no attribute \'root_mean_squared_error\'"): best_model = grid_result.find_best()
def test_grid_points(self): """Test output of grid search on svm and logistic regression""" grid_result = self.context.models.grid_search( self.frame, self.frame, [(self.context.models.classification.svm, {"observation_columns":["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column":"res", "num_iterations": grid_values(5, 100), "step_size": 0.01}), (self.context.models.classification.logistic_regression, {"observation_columns":["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column":"res", "num_iterations": grid_values(2, 15), "step_size": 0.001})]) grid_points = grid_result.grid_points #validate number of items in grid self.assertEqual(len(grid_points), 4) #validate one of the models' name self.assertEqual( grid_points[0].descriptor.model_type.__name__, "sparktk.models.classification.svm") #validate grid values of the first model svm_kwargs_0 = grid_points[0].descriptor.kwargs self.assertEqual(svm_kwargs_0['num_iterations'], 5) self.assertEqual(svm_kwargs_0['step_size'], 0.01) self.assertEqual(svm_kwargs_0['label_column'], "res") self.assertItemsEqual(svm_kwargs_0['observation_columns'], ["vec0", "vec1", "vec2", "vec3", "vec4"]) #validate grid values of the second model svm_kwargs_1 = grid_points[1].descriptor.kwargs self.assertEqual(svm_kwargs_1['num_iterations'], 100) self.assertEqual(svm_kwargs_1['step_size'], 0.01) self.assertEqual(svm_kwargs_1['label_column'], "res") self.assertItemsEqual(svm_kwargs_1['observation_columns'], ["vec0", "vec1", "vec2", "vec3", "vec4"]) #validate grid values of the third model lr_kwargs_0 = grid_points[2].descriptor.kwargs self.assertEqual(lr_kwargs_0['num_iterations'], 2) self.assertEqual(lr_kwargs_0['step_size'], 0.001) self.assertEqual(lr_kwargs_0['label_column'], "res") self.assertItemsEqual(lr_kwargs_0['observation_columns'], ["vec0", "vec1", "vec2", "vec3", "vec4"]) #validate grid values of the third model lr_kwargs_1 = grid_points[3].descriptor.kwargs self.assertEqual(lr_kwargs_1['num_iterations'], 15) self.assertEqual(lr_kwargs_1['step_size'], 0.001) self.assertEqual(lr_kwargs_1['label_column'], "res") self.assertItemsEqual(lr_kwargs_1['observation_columns'], ["vec0", "vec1", "vec2", "vec3", "vec4"]) #validate accuracy metric of one of the models self.assertEquals(grid_points[2].metrics.accuracy, 0.8745)
def test_grid_points_regressors(self): """Test output of grid search on regressors""" grid_result = self.context.models.grid_search( self.regressor_frame, self.regressor_frame, [( self.context.models.regression.linear_regression, { "observation_columns": ["feat1", "feat2"], "label_column": "class", "max_iterations": grid_values(5, 50), "elastic_net_parameter": 0.001}), ( self.context.models.regression.random_forest_regressor, { "observation_columns": ["feat1", "feat2"], "label_column": "class", "num_trees": grid_values(2, 4), "max_depth": 5})]) grid_points = grid_result.grid_points # validate number of items in grid self.assertEqual(len(grid_points), 4) # validate one of the models' name self.assertEqual( grid_points[0].descriptor.model_type.__name__, "sparktk.models.regression.linear_regression") # validate grid values of the first model linreg_kwargs_0 = grid_points[0].descriptor.kwargs self.assertEqual(linreg_kwargs_0['max_iterations'], 5) self.assertEqual(linreg_kwargs_0['elastic_net_parameter'], 0.001) self.assertEqual(linreg_kwargs_0['label_column'], "class") self.assertItemsEqual( linreg_kwargs_0['observation_columns'], ["feat1", "feat2"]) # validate grid values of the third model rf_kwargs_1 = grid_points[2].descriptor.kwargs self.assertEqual(rf_kwargs_1['num_trees'], 2) self.assertEqual(rf_kwargs_1['max_depth'], 5) self.assertEqual(rf_kwargs_1['label_column'], "class") self.assertItemsEqual( rf_kwargs_1['observation_columns'], ["feat1", "feat2"]) # validate accuracy metric of one of the models self.assertAlmostEqual( grid_points[1].metrics.r2, 1.59183568639e-05, delta=1e-04)
def test_incorect_hyper_parameter(self): """Test incorrect hyper parameter name for a model throws exception""" with self.assertRaisesRegexp( Exception, "unknown args named: BAD"): self.context.models.grid_search( self.frame, self.frame, [(self.context.models.classification.svm, {"observation_columns":["vec0", "vec1", "vec2", "vec3", "vec4"], "BAD":"res", "num_iterations": grid_values(5, 100), "step_size": 0.01}), (self.context.models.classification.logistic_regression, {"observation_columns":["vec0", "vec1", "vec2", "vec3", "vec4"], "BAD":"res", "num_iterations": grid_values(2, 15), "step_size": 0.001})])
def test_grid_points_regressors(self): """Test output of grid search on regressors""" grid_result = self.context.models.grid_search( self.regressor_frame, self.regressor_frame, [(self.context.models.regression.linear_regression, { "observation_columns": ["feat1", "feat2"], "label_column": "class", "max_iterations": grid_values(5, 50), "elastic_net_parameter": 0.001 }), (self.context.models.regression.random_forest_regressor, { "observation_columns": ["feat1", "feat2"], "label_column": "class", "num_trees": grid_values(2, 4), "max_depth": 5 })]) grid_points = grid_result.grid_points # validate number of items in grid self.assertEqual(len(grid_points), 4) # validate one of the models' name self.assertEqual(grid_points[0].descriptor.model_type.__name__, "sparktk.models.regression.linear_regression") # validate grid values of the first model linreg_kwargs_0 = grid_points[0].descriptor.kwargs self.assertEqual(linreg_kwargs_0['max_iterations'], 5) self.assertEqual(linreg_kwargs_0['elastic_net_parameter'], 0.001) self.assertEqual(linreg_kwargs_0['label_column'], "class") self.assertItemsEqual(linreg_kwargs_0['observation_columns'], ["feat1", "feat2"]) # validate grid values of the third model rf_kwargs_1 = grid_points[2].descriptor.kwargs self.assertEqual(rf_kwargs_1['num_trees'], 2) self.assertEqual(rf_kwargs_1['max_depth'], 5) self.assertEqual(rf_kwargs_1['label_column'], "class") self.assertItemsEqual(rf_kwargs_1['observation_columns'], ["feat1", "feat2"]) # validate accuracy metric of one of the models self.assertAlmostEqual(grid_points[1].metrics.r2, 1.59183568639e-05, delta=1e-04)
def test_single_fold(self): """Test cross validate with num_folds = 1; should throw exception""" with self.assertRaises(Exception): self.context.models.cross_validate( self.regressor_frame, [(self.context.models.regression.linear_regression, { "observation_columns": ["feat1", "feat2"], "label_column": "class", "max_iterations": grid_values(5, 100), "reg_param": 0.0001 }), (self.context.models.regression.random_forest_regressor, { "observation_columns": ["feat1", "feat2"], "label_column": "class", "num_trees": grid_values(2, 5, 8), "max_depth": 5 })], verbose=False, num_folds=1)
def test_missing_test_frame(self): """Test grid search throws exception for missing test frame""" with self.assertRaisesRegexp( Exception, "takes at least 3 arguments"): self.context.models.grid_search( self.frame, [(self.context.models.classification.svm, {"observation_columns":["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column":"res", "num_iterations": grid_values(1, 4), "step_size": 0.001})])
def test_bad_model_name(self): """Test grid search throws exception for invalid model name""" with self.assertRaisesRegexp( Exception, "no attribute \'BAD\'"): self.context.models.grid_search( self.frame, [(self.context.models.classification.BAD, {"observation_columns":["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column":"res", "num_iterations": grid_values(1, 4), "step_size": 0.001})])
def test_bad_data_type_in_grid_values(self): """Test invalid parameter to grid_values throws exception""" with self.assertRaisesRegexp( Exception, "Method .* does not exist"): self.context.models.grid_search( self.frame, self.frame, [(self.context.models.classification.svm, {"observation_columns":["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column":"res", "num_iterations": grid_values("one"), "step_size": 0.001})])
def test_grid_values_with_xrange(self): """Test grid values with xrange""" grid_result = self.context.models.grid_search( self.frame, self.frame, [(self.context.models.classification.logistic_regression, {"observation_columns":["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column":"res", "num_iterations": grid_values(*xrange(5, 10)), "step_size": 0.001})]) #validate number of models in the grid self.assertEquals(len(grid_result.grid_points), 5)
def test_default_num_fold(self): """Test cross validate with default num_fold parameter""" result = self.context.models.cross_validate( self.frame, [(self.context.models.classification.svm, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(5, 100), "step_size": 0.01 }), (self.context.models.classification.logistic_regression, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(2, 5, 15), "step_size": 0.001 })], verbose=False) #validate number of models all_models = result.all_results actual_num_models = 0 svm_count = 0 log_count = 0 for fold in all_models: grid_points = fold.grid_points actual_num_models += len(grid_points) for grid_point in grid_points: if "svm" in grid_point.descriptor.model_type.__name__: svm_count += 1 else: log_count += 1 expected_num_models = 3 * (2 + 3) self.assertEquals(actual_num_models, expected_num_models) self.assertEqual(svm_count, 6) self.assertEqual(log_count, 9)
def test_missing_test_frame(self): """Test grid search throws exception for missing test frame""" with self.assertRaisesRegexp(Exception, "takes at least 3 arguments"): self.context.models.grid_search( self.frame, [(self.context.models.classification.svm, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(1, 4), "step_size": 0.001 })])
def test_bad_model_name(self): """Test grid search throws exception for invalid model name""" with self.assertRaisesRegexp(Exception, "no attribute \'BAD\'"): self.context.models.grid_search( self.frame, [(self.context.models.classification.BAD, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(1, 4), "step_size": 0.001 })])
def test_grid_values_with_xrange(self): """Test grid values with xrange""" grid_result = self.context.models.grid_search( self.classifier_frame, self.classifier_frame, [(self.context.models.classification.logistic_regression, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(*xrange(5, 10)), "step_size": 0.001 })]) # validate number of models in the grid self.assertEquals(len(grid_result.grid_points), 5)
def test_invalid_eval_name(self): """Test grid search throws exception for invalid model name""" grid_result = self.context.models.grid_search( self.classifier_frame, self.classifier_frame, [(self.context.models.classification.svm, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(5, 10), "step_size": 0.01 }), (self.context.models.classification.logistic_regression, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(*xrange(2, 15)), "step_size": 0.001 })], lambda a, b: getattr(a, "root_mean_squared_error") < getattr( b, "root_mean_squared_error")) with self.assertRaisesRegexp( Exception, "no attribute \'root_mean_squared_error\'"): best_model = grid_result.find_best()
def test_find_best_regressor_with_eval(self): """Test find best in grid_search with custom eval function""" grid_result = self.context.models.grid_search( self.regressor_frame, self.regressor_frame, [(self.context.models.regression.linear_regression, { "observation_columns": ["feat1", "feat2"], "label_column": "class", "max_iterations": grid_values(*(5, 50)), "elastic_net_parameter": 0.001 }), (self.context.models.regression.random_forest_regressor, { "observation_columns": ["feat1", "feat2"], "label_column": "class", "max_depth": grid_values(*xrange(2, 10)), "num_trees": 2 })], lambda a, b: getattr(a, "root_mean_squared_error") < getattr( b, "root_mean_squared_error")) best_model = grid_result.find_best() self.assertEqual(best_model.descriptor.model_type.__name__, "sparktk.models.regression.random_forest_regressor") self.assertAlmostEqual(best_model.metrics.root_mean_squared_error, 0.37, delta=0.01)
def test_bad_data_type_in_grid_values(self): """Test invalid parameter to grid_values throws exception""" with self.assertRaisesRegexp(Exception, "Method .* does not exist"): self.context.models.grid_search( self.classifier_frame, self.classifier_frame, [(self.context.models.classification.svm, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values("one"), "step_size": 0.001 })])
def test_grid_points_classifiers(self): """Test output of grid search on svm and logistic regression""" grid_result = self.context.models.grid_search( self.classifier_frame, self.classifier_frame, [(self.context.models.classification.svm, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(5, 100), "step_size": 0.01 }), (self.context.models.classification.logistic_regression, { "observation_columns": ["vec0", "vec1", "vec2", "vec3", "vec4"], "label_column": "res", "num_iterations": grid_values(2, 15), "step_size": 0.001 })]) grid_points = grid_result.grid_points # validate number of items in grid self.assertEqual(len(grid_points), 4) # validate one of the models' name self.assertEqual(grid_points[0].descriptor.model_type.__name__, "sparktk.models.classification.svm") # validate grid values of the first model svm_kwargs_0 = grid_points[0].descriptor.kwargs self.assertEqual(svm_kwargs_0['num_iterations'], 5) self.assertEqual(svm_kwargs_0['step_size'], 0.01) self.assertEqual(svm_kwargs_0['label_column'], "res") self.assertItemsEqual(svm_kwargs_0['observation_columns'], ["vec0", "vec1", "vec2", "vec3", "vec4"]) # validate grid values of the second model svm_kwargs_1 = grid_points[1].descriptor.kwargs self.assertEqual(svm_kwargs_1['num_iterations'], 100) self.assertEqual(svm_kwargs_1['step_size'], 0.01) self.assertEqual(svm_kwargs_1['label_column'], "res") self.assertItemsEqual(svm_kwargs_1['observation_columns'], ["vec0", "vec1", "vec2", "vec3", "vec4"]) # validate grid values of the third model lr_kwargs_0 = grid_points[2].descriptor.kwargs self.assertEqual(lr_kwargs_0['num_iterations'], 2) self.assertEqual(lr_kwargs_0['step_size'], 0.001) self.assertEqual(lr_kwargs_0['label_column'], "res") self.assertItemsEqual(lr_kwargs_0['observation_columns'], ["vec0", "vec1", "vec2", "vec3", "vec4"]) # validate grid values of the fourth model lr_kwargs_1 = grid_points[3].descriptor.kwargs self.assertEqual(lr_kwargs_1['num_iterations'], 15) self.assertEqual(lr_kwargs_1['step_size'], 0.001) self.assertEqual(lr_kwargs_1['label_column'], "res") self.assertItemsEqual(lr_kwargs_1['observation_columns'], ["vec0", "vec1", "vec2", "vec3", "vec4"]) # validate accuracy metric of one of the models self.assertEquals(grid_points[2].metrics.accuracy, 0.8745)