def test(a_node, pp, algos): ################################## # Test object collection endpoints models = a_node.models() if h2o_test_utils.isVerboser(): print 'Models: ' pp.pprint(models) models = a_node.models(api_version=92) # note: tests API version fallback if h2o_test_utils.isVerboser(): print 'ModelsV92: ' pp.pprint(models) frames = a_node.frames(row_count=5) if h2o_test_utils.isVerboser(): print 'Frames: ' pp.pprint(frames) # TODO: all other collections, including /Jobs and eventually /DKV # TODO: test /Cloud #################################### # test model_builders collection GET if h2o_test_utils.isVerbose(): print 'Testing /ModelBuilders. . .' model_builders = a_node.model_builders(timeoutSecs=240) if h2o_test_utils.isVerboser(): print 'ModelBuilders: ' pp.pprint(model_builders) for algo in algos: assert algo in model_builders[ 'model_builders'], "FAIL: Failed to find algo: " + algo builder = model_builders['model_builders'][algo] h2o_test_utils.validate_builder(algo, builder) #################################### # test model_builders individual GET if h2o_test_utils.isVerbose(): print 'Testing /ModelBuilders/{algo}. . .' for algo in algos: model_builder = a_node.model_builders(algo=algo, timeoutSecs=240) assert algo in model_builder[ 'model_builders'], "FAIL: Failed to find algo: " + algo builder = model_builders['model_builders'][algo] h2o_test_utils.validate_builder(algo, builder) #################################### # test model_metrics collection GET if h2o_test_utils.isVerbose(): print 'Testing /ModelMetrics. . .' model_metrics = a_node.model_metrics(timeoutSecs=240) if h2o_test_utils.isVerboser(): print 'ModelMetrics: ' pp.pprint(model_metrics)
def test(a_node, pp, algos): ################################## # Test object collection endpoints models = a_node.models() if h2o_test_utils.isVerboser(): print 'Models: ' pp.pprint(models) models = a_node.models(api_version=92) # note: tests API version fallback if h2o_test_utils.isVerboser(): print 'ModelsV92: ' pp.pprint(models) frames = a_node.frames(row_count=5) if h2o_test_utils.isVerboser(): print 'Frames: ' pp.pprint(frames) # TODO: all other collections, including /Jobs and eventually /DKV # TODO: test /Cloud #################################### # test model_builders collection GET if h2o_test_utils.isVerbose(): print 'Testing /ModelBuilders. . .' model_builders = a_node.model_builders(timeoutSecs=240) if h2o_test_utils.isVerboser(): print 'ModelBuilders: ' pp.pprint(model_builders) for algo in algos: assert algo in model_builders['model_builders'], "FAIL: Failed to find algo: " + algo builder = model_builders['model_builders'][algo] h2o_test_utils.validate_builder(algo, builder) #################################### # test model_builders individual GET if h2o_test_utils.isVerbose(): print 'Testing /ModelBuilders/{algo}. . .' for algo in algos: model_builder = a_node.model_builders(algo=algo, timeoutSecs=240) assert algo in model_builder['model_builders'], "FAIL: Failed to find algo: " + algo builder = model_builders['model_builders'][algo] h2o_test_utils.validate_builder(algo, builder) #################################### # test model_metrics collection GET if h2o_test_utils.isVerbose(): print 'Testing /ModelMetrics. . .' model_metrics = a_node.model_metrics(timeoutSecs=240) if h2o_test_utils.isVerboser(): print 'ModelMetrics: ' pp.pprint(model_metrics)
def test(a_node, pp): #################################### # test schemas collection GET if h2o_test_utils.isVerbose(): print 'Testing /Metadata/schemas. . .' schemas = a_node.schemas(timeoutSecs=240) assert 'schemas' in schemas, "FAIL: failed to find 'schemas' field in output of /Metadata/schemas: " + repr(schemas) assert type(schemas['schemas']) is list, "'schemas' field in output of /Metadata/schemas is not a list: " + repr(schemas) assert len(schemas['schemas']) > 0, "'schemas' field in output of /Metadata/schemas is empty: " + repr(schemas) if h2o_test_utils.isVerboser(): print 'Schemas: ' pp.pprint(schemas) #################################### # test schemas individual GET if h2o_test_utils.isVerbose(): print 'Testing /Metadata/schemas/FrameV3. . .' schemas = a_node.schema(schemaname='FrameV3', timeoutSecs=240) assert 'schemas' in schemas, "FAIL: failed to find 'schemas' field in output of /Metadata/schemas/FrameV3: " + repr(schemas) assert type(schemas['schemas']) is list, "'schemas' field in output of /Metadata/schemas/FrameV3 is not a list: " + repr(schemas) assert len(schemas['schemas']) == 1, "'schemas' field in output of /Metadata/schemas/FrameV3 has an unexpected length: " + repr(schemas) if h2o_test_utils.isVerboser(): print 'Schemas: ' pp.pprint(schemas) ######################### # test Metadata/endpoints if h2o_test_utils.isVerbose(): print 'Testing /Metadata/endpoints. . .' endpoints = a_node.endpoints() assert 'routes' in endpoints, "FAIL: failed to find routes in the endpoints result." assert type(endpoints['routes']) is list, "FAIL: routes in the endpoints result is not a list." assert len(endpoints['routes']) > 0, "FAIL: routes list in the endpoints result is empty." assert type(endpoints['routes'][0]) is dict, "FAIL: routes[0] in the endpoints result is not a dict." assert 'input_schema' in endpoints['routes'][0], "FAIL: routes[0] in the endpoints result does not have an 'input_schema' field." ######################### # test Metadata/schemas if h2o_test_utils.isVerbose(): print 'Testing /Metadata/schemas. . .' schemas = a_node.schemas() assert 'schemas' in schemas, "FAIL: failed to find schemas in the schemas result." assert type(schemas['schemas']) is list, "FAIL: schemas in the schemas result is not a list." assert len(schemas['schemas']) > 0, "FAIL: schemas list in the schemas result is empty." assert type(schemas['schemas'][0]) is dict, "FAIL: schemas[0] in the schemas result is not a dict." assert 'fields' in schemas['schemas'][0], "FAIL: schemas[0] in the schemas result does not have an 'fields' field."
parser = argparse.ArgumentParser(description='Run basic H2O REST API tests.', ) parser.add_argument('--verbose', '-v', help='verbose output', action='count') parser.add_argument('--usecloud', help='ip:port to attach to', default='') parser.add_argument('--host', help='hostname to attach to', default='localhost') parser.add_argument('--port', help='port to attach to', type=int, default=54321) args = parser.parse_args() h2o_test_utils.setVerbosity(args.verbose) h2o.H2O.verbose = h2o_test_utils.isVerboser() if (len(args.usecloud) > 0): arr = args.usecloud.split(":") args.host = arr[0] args.port = int(arr[1]) host = args.host port = args.port h2o.H2O.verboseprint("host: " + str(host)) h2o.H2O.verboseprint("port" + str(port)) pp = pprint.PrettyPrinter(indent=4) # pretty printer for debugging ################
def build_and_test(a_node, pp, datasets, algos, algo_additional_default_params): #################################################################################################### # Build and do basic validation checks on models #################################################################################################### models_to_build = [ ModelSpec.for_dataset('kmeans_prostate', 'kmeans', datasets['prostate_clustering'], { 'k': 2 } ), ModelSpec.for_dataset('glm_prostate_regression', 'glm', datasets['prostate_regression'], {'family': 'gaussian'} ), ModelSpec.for_dataset('glm_prostate_binomial', 'glm', datasets['prostate_binomial'], {'family': 'binomial'} ), ModelSpec.for_dataset('glm_airlines_binomial', 'glm', datasets['airlines_binomial'], {'response_column': 'IsDepDelayed', 'family': 'binomial' } ), ModelSpec.for_dataset('glm_iris_multinomial', 'glm', datasets['iris_multinomial'], {'response_column': 'class', 'family': 'multinomial' } ), ModelSpec.for_dataset('deeplearning_prostate_regression', 'deeplearning', datasets['prostate_regression'], { 'epochs': 1, 'loss': 'Quadratic' } ), ModelSpec.for_dataset('deeplearning_prostate_binomial', 'deeplearning', datasets['prostate_binomial'], { 'epochs': 1, 'hidden': [20, 20], 'loss': 'CrossEntropy' } ), ModelSpec.for_dataset('deeplearning_airlines_binomial', 'deeplearning', datasets['airlines_binomial'], { 'epochs': 1, 'hidden': [10, 10], 'loss': 'CrossEntropy' } ), ModelSpec.for_dataset('deeplearning_iris_multinomial', 'deeplearning', datasets['iris_multinomial'], { 'epochs': 1, 'loss': 'CrossEntropy' } ), ModelSpec.for_dataset('gbm_prostate_regression', 'gbm', datasets['prostate_regression'], { 'ntrees': 5, 'distribution': 'gaussian' } ), ModelSpec.for_dataset('gbm_prostate_binomial', 'gbm', datasets['prostate_binomial'], { 'ntrees': 5, 'distribution': 'multinomial' } ), ModelSpec.for_dataset('gbm_airlines_binomial', 'gbm', datasets['airlines_binomial'], { 'ntrees': 5, 'distribution': 'multinomial' } ), ModelSpec.for_dataset('gbm_iris_multinomial', 'gbm', datasets['iris_multinomial'], { 'ntrees': 5, 'distribution': 'multinomial' } ), ] # For grid testing, don't build any non-grid models: # models_to_build = [] built_models = {} for model_spec in models_to_build: model = model_spec.build_and_validate_model(a_node) built_models[model_spec['dest_key']] = model grids_to_build = [ GridSpec.for_dataset('kmeans_prostate_grid', 'kmeans', datasets['prostate_clustering'], { }, { 'k': [2, 3, 4] } ), GridSpec.for_dataset('glm_prostate_regression_grid', 'glm', datasets['prostate_regression'], {'family': 'gaussian'}, { 'lambda': [0.0001, 0.001, 0.01, 0.1] } ), GridSpec.for_dataset('glm_prostate_binomial_grid', 'glm', datasets['prostate_binomial'], {'family': 'binomial'}, { 'lambda': [0.0001, 0.001, 0.01, 0.1] } ), GridSpec.for_dataset('glm_airlines_binomial_grid', 'glm', datasets['airlines_binomial'], {'response_column': 'IsDepDelayed', 'family': 'binomial'}, { 'lambda': [0.0001, 0.001, 0.01, 0.025] } ), GridSpec.for_dataset('glm_iris_multinomial_grid', 'glm', datasets['iris_multinomial'], {'response_column': 'class', 'family': 'multinomial'}, { 'lambda': [0.0001, 0.001, 0.01, 0.025] } ), GridSpec.for_dataset('deeplearning_prostate_regression_grid', 'deeplearning', datasets['prostate_regression'], { 'loss': 'Quadratic' }, { 'epochs': [0.1, 0.5, 1] } ), GridSpec.for_dataset('deeplearning_prostate_binomial_grid', 'deeplearning', datasets['prostate_binomial'], { 'hidden': [20, 20], 'loss': 'CrossEntropy' }, { 'epochs': [0.1, 0.5, 1] } ), GridSpec.for_dataset('deeplearning_airlines_binomial_grid', 'deeplearning', datasets['airlines_binomial'], { 'hidden': [10, 10], 'loss': 'CrossEntropy' }, { 'epochs': [0.1, 0.5, 1] } ), GridSpec.for_dataset('deeplearning_iris_multinomial_grid', 'deeplearning', datasets['iris_multinomial'], { 'loss': 'CrossEntropy' }, { 'epochs': [0.1, 0.5, 1] } ), GridSpec.for_dataset('gbm_prostate_regression_grid', 'gbm', datasets['prostate_regression'], { 'max_depth': 3 }, { 'ntrees': [1, 5, 10], 'distribution': ["gaussian", "poisson", "gamma", "tweedie"] } ), GridSpec.for_dataset('gbm_prostate_binomial_grid', 'gbm', datasets['prostate_binomial'], { }, { 'ntrees': [5, 7], 'max_depth': [1, 3, 5] } ), GridSpec.for_dataset('gbm_airlines_binomial_grid', 'gbm', datasets['airlines_binomial'], { 'distribution': 'multinomial' }, { 'ntrees': [1, 5, 10], 'max_depth': [1, 3, 5] } ), GridSpec.for_dataset('gbm_iris_multinomial_grid', 'gbm', datasets['iris_multinomial'], { 'distribution': 'multinomial' }, { 'ntrees': [1, 5, 10], 'max_depth': [1, 3, 5] } ), # TODO: this should trigger a parameter validation error, but instead the non-grid ntrees silently overrides the grid values: GridSpec.for_dataset('gbm_iris_multinomial_grid', 'gbm', datasets['iris_multinomial'], { 'ntrees': 5, 'distribution': 'multinomial' }, { 'ntrees': [1, 5, 10], 'max_depth': [1, 3, 5] } ), # Test stopping criteria: GridSpec.for_dataset('gbm_prostate_regression_grid_max_3', 'gbm', datasets['prostate_regression'], { 'max_depth': 3 }, { 'ntrees': [1, 2, 4], 'distribution': ["gaussian", "poisson", "gamma", "tweedie"] }, { 'strategy': "RandomDiscrete", 'max_models': 3 } ), GridSpec.for_dataset('gbm_prostate_regression_grid_max_20mS', 'gbm', datasets['prostate_regression'], { 'max_depth': 3 }, { 'ntrees': [1, 2, 4], 'distribution': ["gaussian", "poisson", "gamma", "tweedie"] }, { 'strategy': "RandomDiscrete", 'max_runtime_secs': 0.020 } ), ] for grid_spec in grids_to_build: grid = grid_spec.build_and_validate_grid(a_node) for model_key in grid['model_ids']: model_key = model_key['name'] built_models[model_key] = a_node.models(key=model_key) # test search limits: max_models grid = a_node.grid(key='gbm_prostate_regression_grid_max_3') assert len(grid['model_ids']) == 3, "FAIL: using max_models, expected a max of 3 models, got: " + str(len(grid['model_ids'])) # test search limits: max_runtime_secs grid = a_node.grid(key='gbm_prostate_regression_grid_max_20mS') assert len(grid['model_ids']) < 12, "FAIL: using max_runtime_secs, expected less than 12 models, got: " + str(len(grid['model_ids'])) # grid = a_node.grid(key='kmeans_prostate_grid', sort_by='', decreasing=True) h2o_test_utils.fetch_and_validate_grid_sort(a_node, key='kmeans_prostate_grid', sort_by='totss', decreasing=True) h2o_test_utils.fetch_and_validate_grid_sort(a_node, key='kmeans_prostate_grid', sort_by='tot_withinss', decreasing=True) h2o_test_utils.fetch_and_validate_grid_sort(a_node, key='kmeans_prostate_grid', sort_by='betweenss', decreasing=True) h2o_test_utils.fetch_and_validate_grid_sort(a_node, key='kmeans_prostate_grid', sort_by='totss', decreasing=False) h2o_test_utils.fetch_and_validate_grid_sort(a_node, key='kmeans_prostate_grid', sort_by='tot_withinss', decreasing=False) h2o_test_utils.fetch_and_validate_grid_sort(a_node, key='kmeans_prostate_grid', sort_by='betweenss', decreasing=False) # import sys # sys.exit(0) ####################################### # Test default parameters validation for each model builder # if h2o_test_utils.isVerbose(): print 'Testing ModelBuilder default parameters. . .' model_builders = a_node.model_builders(timeoutSecs=240)['model_builders'] # Do we know about all of them? server_algos = model_builders.keys() assert len(set(server_algos) - set(algos)) == 0, "FAIL: Our set of algos doesn't match what the server knows about. Ours: " + repr(algos) + "; server's: " + repr(server_algos) for algo, model_builder in model_builders.iteritems(): parameters_list = model_builder['parameters'] test_parameters = { value['name'] : value['default_value'] for value in parameters_list } # collect default parameters if algo in algo_additional_default_params: test_parameters.update(algo_additional_default_params[algo]) if h2o_test_utils.isVerboser(): print 'Testing ' + algo + ' with params: ' + repr(test_parameters) parameters_validation = a_node.validate_model_parameters(algo=algo, training_frame=None, parameters=test_parameters, timeoutSecs=240) # synchronous assert 'error_count' in parameters_validation, "FAIL: Failed to find error_count in good-parameters parameters validation result." h2o.H2O.verboseprint("Bad params validation messages: ", repr(parameters_validation)) expected_count = 0 if expected_count != parameters_validation['error_count']: print "validation errors: " pp.pprint(parameters_validation) assert expected_count == parameters_validation['error_count'], "FAIL: " + str(expected_count) + " != error_count in good-parameters parameters validation result." ####################################### # Test DeepLearning parameters validation # # Default parameters: if h2o_test_utils.isVerbose(): print 'Testing DeepLearning default parameters. . .' model_builder = a_node.model_builders(algo='deeplearning', timeoutSecs=240)['model_builders']['deeplearning'] dl_test_parameters_list = model_builder['parameters'] dl_test_parameters = {value['name'] : value['default_value'] for value in dl_test_parameters_list} parameters_validation = a_node.validate_model_parameters(algo='deeplearning', training_frame=None, parameters=dl_test_parameters, timeoutSecs=240) # synchronous assert 'error_count' in parameters_validation, "FAIL: Failed to find error_count in good-parameters parameters validation result." h2o.H2O.verboseprint("Bad params validation messages: ", repr(parameters_validation)) if 0 != parameters_validation['error_count']: print "validation errors: " pp.pprint(parameters_validation) assert 0 == parameters_validation['error_count'], "FAIL: 0 != error_count in good-parameters parameters validation result." # Good parameters (note: testing with null training_frame): if h2o_test_utils.isVerbose(): print 'Testing DeepLearning good parameters. . .' dl_test_parameters = {'response_column': 'CAPSULE', 'hidden': "[10, 20, 10]" } parameters_validation = a_node.validate_model_parameters(algo='deeplearning', training_frame=None, parameters=dl_test_parameters, timeoutSecs=240) # synchronous assert 'error_count' in parameters_validation, "FAIL: Failed to find error_count in good-parameters parameters validation result." h2o.H2O.verboseprint("Bad params validation messages: ", repr(parameters_validation)) if 0 != parameters_validation['error_count']: print "validation errors: " pp.pprint(parameters_validation) assert 0 == parameters_validation['error_count'], "FAIL: 0 != error_count in good-parameters parameters validation result." # Bad parameters (hidden is null): # (note: testing with null training_frame) if h2o_test_utils.isVerbose(): print 'Testing DeepLearning bad parameters, null training_frame. . .' dl_test_parameters = {'response_column': 'CAPSULE', 'hidden': "[10, 20, 10]", 'input_dropout_ratio': 27 } parameters_validation = a_node.validate_model_parameters(algo='deeplearning', training_frame=None, parameters=dl_test_parameters, timeoutSecs=240) # synchronous assert 'error_count' in parameters_validation, "FAIL: Failed to find error_count in bad-parameters parameters validation result (input_dropout_ratio)." h2o.H2O.verboseprint("Good params validation messages: ", repr(parameters_validation)) assert 0 != parameters_validation['error_count'], "FAIL: 0 == error_count in bad-parameters parameters validation result: " + repr(parameters_validation) found_expected_error = False for validation_message in parameters_validation['messages']: if validation_message['message_type'] == 'ERRR' and validation_message['field_name'] == 'input_dropout_ratio': found_expected_error = True assert found_expected_error, "FAIL: Failed to find error message about input_dropout_ratio in the validation messages." # Bad parameters (no response_column): if h2o_test_utils.isVerbose(): print 'Testing DeepLearning bad parameters, null response_column. . .' dl_test_parameters = {'hidden': "[10, 20, 10]" } parameters_validation = a_node.validate_model_parameters(algo='deeplearning', training_frame='prostate_binomial', parameters=dl_test_parameters, timeoutSecs=240) # synchronous assert 'error_count' in parameters_validation, "FAIL: Failed to find error_count in bad-parameters parameters validation result (response_column)." h2o.H2O.verboseprint("Good params validation messages: ", repr(parameters_validation)) assert 0 != parameters_validation['error_count'], "FAIL: 0 == error_count in bad-parameters parameters validation result: " + repr(parameters_validation) ####################################### # Try to build DeepLearning model for Prostate but with bad parameters; we should get a ModelParametersSchema with the error. if h2o_test_utils.isVerbose(): print 'About to try to build a DeepLearning model with bad parameters. . .' dl_prostate_bad_parameters = {'response_column': 'CAPSULE', 'hidden': "[10, 20, 10]", 'input_dropout_ratio': 27 } parameters_validation = a_node.build_model(algo='deeplearning', model_id='deeplearning_prostate_binomial_bad', training_frame='prostate_binomial', parameters=dl_prostate_bad_parameters, timeoutSecs=240) # synchronous h2o_test_utils.validate_validation_messages(parameters_validation, ['input_dropout_ratio']) assert parameters_validation['__http_response']['status_code'] == requests.codes.precondition_failed, "FAIL: expected 412 Precondition Failed from a bad build request, got: " + str(parameters_validation['__http_response']['status_code']) if h2o_test_utils.isVerbose(): print 'Done trying to build DeepLearning model with bad parameters.' ##################################### # Early test of predict() # TODO: remove after we remove the early exit p = a_node.predict(model='deeplearning_airlines_binomial', frame='airlines_binomial', predictions_frame='deeplearning_airlines_binomial_predictions') h2o_test_utils.validate_predictions(a_node, p, 'deeplearning_airlines_binomial', 'airlines_binomial', 43978, predictions_frame='deeplearning_airlines_binomial_predictions') h2o_test_utils.validate_frame_exists(a_node, 'deeplearning_airlines_binomial_predictions') h2o.H2O.verboseprint("Predictions for scoring: ", 'deeplearning_airlines_binomial', " on: ", 'airlines_binomial', ": ", repr(p))
def test(a_node, pp, algos): ################################## # Test cluster status cloud = a_node.cloud() if h2o_test_utils.isVerboser(): print 'Cloud: ' pp.pprint(cloud) not_ok = a_node.cloud_is_bad() assert not not_ok, "FAIL: cloud status is not ok! Reason: " + not_ok jobs = a_node.jobs() if h2o_test_utils.isVerboser(): print 'Jobs: ' pp.pprint(jobs) assert 'jobs' in jobs, "FAIL: 'jobs' element not found in the result of /Jobs" ################################## # Test object collection endpoints models = a_node.models() if h2o_test_utils.isVerboser(): print 'Models: ' pp.pprint(models) models = a_node.models(api_version=92) # note: tests API version fallback if h2o_test_utils.isVerboser(): print 'ModelsV92: ' pp.pprint(models) frames = a_node.frames(row_count=5) if h2o_test_utils.isVerboser(): print 'Frames: ' pp.pprint(frames) #################################### # test model_builders collection GET if h2o_test_utils.isVerbose(): print 'Testing /ModelBuilders. . .' model_builders = a_node.model_builders(timeoutSecs=240) if h2o_test_utils.isVerboser(): print 'ModelBuilders: ' pp.pprint(model_builders) for algo in algos: assert algo in model_builders['model_builders'], "FAIL: Failed to find algo: " + algo builder = model_builders['model_builders'][algo] h2o_test_utils.validate_builder(algo, builder) #################################### # test model_builders individual GET if h2o_test_utils.isVerbose(): print 'Testing /ModelBuilders/{algo}. . .' for algo in algos: model_builder = a_node.model_builders(algo=algo, timeoutSecs=240) assert algo in model_builder['model_builders'], "FAIL: Failed to find algo: " + algo builder = model_builders['model_builders'][algo] h2o_test_utils.validate_builder(algo, builder) #################################### # test model_metrics collection GET if h2o_test_utils.isVerbose(): print 'Testing /ModelMetrics. . .' model_metrics = a_node.model_metrics(timeoutSecs=240) if h2o_test_utils.isVerboser(): print 'ModelMetrics[0]: ' pp.pprint(model_metrics['model_metrics'][0]) #################################### # test model_metrics individual GET model_metrics = a_node.model_metrics(timeoutSecs=240, model="deeplearning_prostate_binomial", frame="prostate_binomial")
################# # setup ################# parser = argparse.ArgumentParser( description='Run basic H2O REST API tests.', ) parser.add_argument('--verbose', '-v', help='verbose output', action='count') parser.add_argument('--usecloud', help='ip:port to attach to', default='') parser.add_argument('--host', help='hostname to attach to', default='localhost') parser.add_argument('--port', help='port to attach to', type=int, default=54321) args = parser.parse_args() h2o_test_utils.setVerbosity(args.verbose) h2o.H2O.verbose = h2o_test_utils.isVerboser() if (len(args.usecloud) > 0): arr = args.usecloud.split(":") args.host = arr[0] args.port = int(arr[1]) host = args.host port = args.port h2o.H2O.verboseprint("host: " + str(host)) h2o.H2O.verboseprint("port" + str(port)) pp = pprint.PrettyPrinter(indent=4) # pretty printer for debugging ################
def load_and_test(a_node, pp): ################## # Test CreateFrame if h2o_test_utils.isVerbose(): print('Testing CreateFrame. . .') created_job = a_node.create_frame(dest='created') # call with defaults a_node.poll_job( job_key=created_job['key']['name'] ) # wait until done and get CreateFrameV3 instance (aka the Job) frames = a_node.frames(key='created')['frames'] assert len( frames ) == 1, "FAIL: expected to find 1 frame called 'created', found: " + str( len(frames)) assert frames[0]['frame_id'][ 'name'] == 'created', "FAIL: expected to find 1 frame called 'created', found: " + repr( frames) created = frames[0] assert 'rows' in created, "FAIL: failed to find 'rows' field in CreateFrame result." assert created[ 'rows'] == 10000, "FAIL: expected value of 'rows' field in CreateFrame result to be: " + str( 10000) + ", found: " + str(created['rows']) assert 'columns' in created, "FAIL: failed to find 'columns' field in CreateFrame result." assert len( created['columns'] ) == 10, "FAIL: expected value of 'columns' field in CreateFrame result to be: " + str( 10) + ", found: " + str(len(created['columns'])) ######################################################### # Import and test all the datasets we'll need for the subsequent tests: ######################################################### # dest_key, path, expected_rows, model_category, response_column, ignored_columns datasets_to_import = [ DatasetSpec('prostate_clustering', '../../../smalldata/logreg/prostate.csv', 380, 'Clustering', None, ['ID']), DatasetSpec('prostate_binomial', '../../../smalldata/logreg/prostate.csv', 380, 'Binomial', 'CAPSULE', ['ID']), DatasetSpec('prostate_regression', '../../../smalldata/logreg/prostate.csv', 380, 'Regression', 'AGE', ['ID']), DatasetSpec('airlines_binomial', '../../../smalldata/airlines/allyears2k_headers.zip', 43978, 'Binomial', 'IsDepDelayed', [ 'DayofMonth', 'DepTime', 'CRSDepTime', 'ArrTime', 'CRSArrTime', 'TailNum', 'ActualElapsedTime', 'CRSElapsedTime', 'AirTime', 'ArrDelay', 'DepDelay', 'TaxiIn', 'TaxiOut', 'Cancelled', 'CancellationCode', 'Diverted', 'CarrierDelay', 'WeatherDelay', 'NASDelay', 'SecurityDelay', 'LateAircraftDelay', 'IsArrDelayed' ]), DatasetSpec('iris_multinomial', '../../../smalldata/iris/iris_wheader.csv', 150, 'Multinomial', 'class', []), ] datasets = {} # the dataset spec for dataset_spec in datasets_to_import: dataset = dataset_spec.import_and_validate_dataset( a_node) # it's also stored in dataset_spec['dataset'] if dataset_spec['model_category'] == 'Binomial': a_node.as_factor(dataset_spec['dest_key'], dataset_spec['response_column']) datasets[dataset_spec['dest_key']] = dataset_spec ################################################ # Test /Frames for prostate.csv frames = a_node.frames(row_count=5)['frames'] frames_dict = h2o_test_utils.list_to_dict(frames, 'frame_id/name') if h2o_test_utils.isVerboser(): print("frames: ") pp.pprint(frames) if h2o_test_utils.isVerboser(): print("frames_dict: ") pp.pprint(frames_dict) assert 'prostate_binomial' in frames_dict, "FAIL: Failed to find " + 'prostate_binomial' + " in Frames list." assert not frames_dict['prostate_binomial'][ 'is_text'], "FAIL: Parsed Frame is is_text" # Test /Frames/{key} for prostate.csv frames = a_node.frames(key='prostate_binomial', row_count=5)['frames'] frames_dict = h2o_test_utils.list_to_dict(frames, 'frame_id/name') assert 'prostate_binomial' in frames_dict, "FAIL: Failed to find prostate.hex in Frames list." columns_dict = h2o_test_utils.list_to_dict(frames[0]['columns'], 'label') assert 'CAPSULE' in columns_dict, "FAIL: Failed to find CAPSULE in Frames/prostate.hex." assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns." assert 'histogram_bins' in columns_dict[ 'AGE'], "FAIL: Failed to find bins in Frames/prostate.hex/columns/AGE." h2o.H2O.verboseprint('bins: ' + repr(columns_dict['AGE']['histogram_bins'])) assert None is columns_dict['AGE'][ 'histogram_bins'], "FAIL: Failed to clear bins field." # should be cleared except for /summary # Test /Frames/{key}/columns for prostate.csv frames = a_node.columns(key='prostate_binomial')['frames'] columns_dict = h2o_test_utils.list_to_dict(frames[0]['columns'], 'label') assert 'ID' in columns_dict, "FAIL: Failed to find ID in Frames/prostate.hex/columns." assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns." assert 'histogram_bins' in columns_dict[ 'AGE'], "FAIL: Failed to find bins in Frames/prostate.hex/columns/AGE." h2o.H2O.verboseprint('bins: ' + repr(columns_dict['AGE']['histogram_bins'])) assert None is columns_dict['AGE'][ 'histogram_bins'], "FAIL: Failed to clear bins field." # should be cleared except for /summary # Test /Frames/{key}/columns/{label} for prostate.csv frames = a_node.column(key='prostate_binomial', column='AGE')['frames'] columns_dict = h2o_test_utils.list_to_dict(frames[0]['columns'], 'label') assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns." assert 'histogram_bins' in columns_dict[ 'AGE'], "FAIL: Failed to find bins in Frames/prostate.hex/columns/AGE." h2o.H2O.verboseprint('bins: ' + repr(columns_dict['AGE']['histogram_bins'])) assert None is columns_dict['AGE'][ 'histogram_bins'], "FAIL: Failed to clear bins field." # should be cleared except for /summary # Test /Frames/{key}/columns/{label}/summary for prostate.csv frames = a_node.summary(key='prostate_binomial', column='AGE')['frames'] columns_dict = h2o_test_utils.list_to_dict(frames[0]['columns'], 'label') assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns/AGE/summary." col = columns_dict['AGE'] h2o_test_utils.assertKeysExistAndNonNull(col, '', [ 'label', 'missing_count', 'zero_count', 'positive_infinity_count', 'negative_infinity_count', 'mins', 'maxs', 'mean', 'sigma', 'type', 'data', 'precision', 'histogram_bins', 'histogram_base', 'histogram_stride', 'percentiles' ]) h2o_test_utils.assertKeysExist(col, '', ['domain', 'string_data']) assert col['mins'][ 0] == 43, 'FAIL: Failed to find 43 as the first min for AGE.' assert col['maxs'][ 0] == 79, 'FAIL: Failed to find 79 as the first max for AGE.' assert abs( col['mean'] - 66.03947368421052 ) < 1e-8, 'FAIL: Failed to find 66.03947368421052 as the mean for AGE.' assert abs( col['sigma'] - 6.527071269173308 ) < 1e-8, 'FAIL: Failed to find 6.527071269173308 as the sigma for AGE.' assert col[ 'type'] == 'int', 'FAIL: Failed to find int as the type for AGE.' assert col['data'][ 0] == 65, 'FAIL: Failed to find 65 as the first data for AGE.' assert col[ 'precision'] == -1, 'FAIL: Failed to find -1 as the precision for AGE.' assert col['histogram_bins'][ 0] == 1, 'FAIL: Failed to find 1 as the first bin for AGE.' assert col[ 'histogram_base'] == 43, 'FAIL: Failed to find 43 as the histogram_base for AGE.' assert col[ 'histogram_stride'] == 1, 'FAIL: Failed to find 1 as the histogram_stride for AGE.' assert col['percentiles'][ 0] == 44.516, 'FAIL: Failed to find 43.516 as the 0.1% percentile for AGE. ' + str( col['percentiles'][0]) assert col['percentiles'][ 1] == 50.79, 'FAIL: Failed to find 50.79 as the 1.0% percentile for AGE. ' + str( col['percentiles'][1]) assert col['percentiles'][ 15] == 78, 'FAIL: Failed to find 78 as the 99.0% percentile for AGE. ' + str( col['percentiles'][15]) assert col['percentiles'][ 16] == 79, 'FAIL: Failed to find 79 as the 99.9% percentile for AGE. ' + str( col['percentiles'][16]) # NB: col['percentiles'] corresponds to probs=[0.001,0.01,0.1,0.2,0.25,0.3,1.0/3.0,0.4,0.5,0.6,2.0/3.0,0.7,0.75,0.8,0.9,0.99,0.999] # Test /SplitFrame for prostate.csv if h2o_test_utils.isVerbose(): print('Testing SplitFrame with named destination_frames. . .') splits = a_node.split_frame(dataset='prostate_binomial', ratios=[0.8], destination_frames=['bigger', 'smaller']) frames = a_node.frames()['frames'] h2o_test_utils.validate_frame_exists(a_node, 'bigger', frames) h2o_test_utils.validate_frame_exists(a_node, 'smaller', frames) bigger = a_node.frames(key='bigger')['frames'][0] smaller = a_node.frames(key='smaller')['frames'][0] assert bigger[ 'rows'] == 304, 'FAIL: 80/20 SplitFrame yielded the wrong number of rows. Expected: 304; got: ' + bigger[ 'rows'] assert smaller[ 'rows'] == 76, 'FAIL: 80/20 SplitFrame yielded the wrong number of rows. Expected: 76; got: ' + smaller[ 'rows'] h2o_test_utils.validate_job_exists(a_node, splits['key']['name']) if h2o_test_utils.isVerbose(): print('Testing SplitFrame with generated destination_frames. . .') splits = a_node.split_frame(dataset='prostate_binomial', ratios=[0.5]) frames = a_node.frames()['frames'] h2o_test_utils.validate_frame_exists( a_node, splits['destination_frames'][0]['name'], frames) h2o_test_utils.validate_frame_exists( a_node, splits['destination_frames'][1]['name'], frames) first = a_node.frames( key=splits['destination_frames'][0]['name'])['frames'][0] second = a_node.frames( key=splits['destination_frames'][1]['name'])['frames'][0] assert first[ 'rows'] == 190, 'FAIL: 50/50 SplitFrame yielded the wrong number of rows. Expected: 190; got: ' + first[ 'rows'] assert second[ 'rows'] == 190, 'FAIL: 50/50 SplitFrame yielded the wrong number of rows. Expected: 190; got: ' + second[ 'rows'] h2o_test_utils.validate_job_exists(a_node, splits['key']['name']) return datasets
def build_and_test(a_node, pp, datasets, algos, algo_additional_default_params): #################################################################################################### # Build and do basic validation checks on models #################################################################################################### models_to_build = [ ModelSpec.for_dataset('kmeans_prostate', 'kmeans', datasets['prostate_clustering'], {'k': 2}), ModelSpec.for_dataset('glm_prostate_regression', 'glm', datasets['prostate_regression'], {'family': 'gaussian'}), ModelSpec.for_dataset('glm_prostate_binomial', 'glm', datasets['prostate_binomial'], {'family': 'binomial'}), ModelSpec.for_dataset('glm_airlines_binomial', 'glm', datasets['airlines_binomial'], { 'response_column': 'IsDepDelayed', 'family': 'binomial' }), ModelSpec.for_dataset('glm_iris_multinomial', 'glm', datasets['iris_multinomial'], { 'response_column': 'class', 'family': 'multinomial' }), ModelSpec.for_dataset('deeplearning_prostate_regression', 'deeplearning', datasets['prostate_regression'], { 'epochs': 1, 'loss': 'Quadratic' }), ModelSpec.for_dataset('deeplearning_prostate_binomial', 'deeplearning', datasets['prostate_binomial'], { 'epochs': 1, 'hidden': [20, 20], 'loss': 'CrossEntropy' }), ModelSpec.for_dataset('deeplearning_airlines_binomial', 'deeplearning', datasets['airlines_binomial'], { 'epochs': 1, 'hidden': [10, 10], 'loss': 'CrossEntropy' }), ModelSpec.for_dataset('deeplearning_iris_multinomial', 'deeplearning', datasets['iris_multinomial'], { 'epochs': 1, 'loss': 'CrossEntropy' }), ModelSpec.for_dataset('gbm_prostate_regression', 'gbm', datasets['prostate_regression'], { 'ntrees': 5, 'distribution': 'gaussian' }), ModelSpec.for_dataset('gbm_prostate_binomial', 'gbm', datasets['prostate_binomial'], { 'ntrees': 5, 'distribution': 'multinomial' }), ModelSpec.for_dataset('gbm_airlines_binomial', 'gbm', datasets['airlines_binomial'], { 'ntrees': 5, 'distribution': 'multinomial' }), ModelSpec.for_dataset('gbm_iris_multinomial', 'gbm', datasets['iris_multinomial'], { 'ntrees': 5, 'distribution': 'multinomial' }), ] # For grid testing, don't build any non-grid models: # models_to_build = [] built_models = {} for model_spec in models_to_build: model = model_spec.build_and_validate_model(a_node) built_models[model_spec['dest_key']] = model grids_to_build = [ # setting a hyperparameter in both places: # GridSpec.for_dataset('kmeans_prostate_grid', 'kmeans', datasets['prostate_clustering'], { 'k': 6 }, { 'k': [2, 3, 4] } ), GridSpec.for_dataset('kmeans_prostate_grid', 'kmeans', datasets['prostate_clustering'], {}, {'k': [2, 3, 4]}), GridSpec.for_dataset('glm_prostate_regression_grid', 'glm', datasets['prostate_regression'], {'family': 'gaussian'}, {'lambda': [0.0001, 0.001, 0.01, 0.1]}), GridSpec.for_dataset('glm_prostate_binomial_grid', 'glm', datasets['prostate_binomial'], {'family': 'binomial'}, {'lambda': [0.0001, 0.001, 0.01, 0.1]}), GridSpec.for_dataset('glm_airlines_binomial_grid', 'glm', datasets['airlines_binomial'], { 'response_column': 'IsDepDelayed', 'family': 'binomial' }, {'lambda': [0.0001, 0.001, 0.01, 0.025]}), GridSpec.for_dataset('glm_iris_multinomial_grid', 'glm', datasets['iris_multinomial'], { 'response_column': 'class', 'family': 'multinomial' }, {'lambda': [0.0001, 0.001, 0.01, 0.025]}), GridSpec.for_dataset('deeplearning_prostate_regression_grid', 'deeplearning', datasets['prostate_regression'], {'loss': 'Quadratic'}, {'epochs': [0.1, 0.5, 1]}), GridSpec.for_dataset('deeplearning_prostate_binomial_grid', 'deeplearning', datasets['prostate_binomial'], { 'hidden': [20, 20], 'loss': 'CrossEntropy' }, {'epochs': [0.1, 0.5, 1]}), GridSpec.for_dataset('deeplearning_airlines_binomial_grid', 'deeplearning', datasets['airlines_binomial'], { 'hidden': [10, 10], 'loss': 'CrossEntropy' }, {'epochs': [0.1, 0.5, 1]}), GridSpec.for_dataset('deeplearning_iris_multinomial_grid', 'deeplearning', datasets['iris_multinomial'], {'loss': 'CrossEntropy'}, {'epochs': [0.1, 0.5, 1]}), GridSpec.for_dataset( 'gbm_prostate_regression_grid', 'gbm', datasets['prostate_regression'], {'max_depth': 3}, { 'ntrees': [1, 5, 10], 'distribution': ["gaussian", "poisson", "gamma", "tweedie"] }), GridSpec.for_dataset('gbm_prostate_binomial_grid', 'gbm', datasets['prostate_binomial'], {}, { 'ntrees': [5, 7], 'max_depth': [1, 3, 5] }), GridSpec.for_dataset('gbm_airlines_binomial_grid', 'gbm', datasets['airlines_binomial'], {'distribution': 'multinomial'}, { 'ntrees': [1, 5, 10], 'max_depth': [1, 3, 5] }), GridSpec.for_dataset('gbm_iris_multinomial_grid', 'gbm', datasets['iris_multinomial'], {'distribution': 'multinomial'}, { 'ntrees': [1, 5, 10], 'max_depth': [1, 3, 5] }), # TODO: this should trigger a parameter validation error, but instead the non-grid ntrees silently overrides the grid values: GridSpec.for_dataset('gbm_iris_multinomial_grid', 'gbm', datasets['iris_multinomial'], { 'ntrees': 5, 'distribution': 'multinomial' }, { 'ntrees': [1, 5, 10], 'max_depth': [1, 3, 5] } ), # Test stopping criteria: GridSpec.for_dataset( 'gbm_prostate_regression_grid_max_3', 'gbm', datasets['prostate_regression'], {'max_depth': 3}, { 'ntrees': [1, 2, 4], 'distribution': ["gaussian", "poisson", "gamma", "tweedie"] }, { 'strategy': "RandomDiscrete", 'max_models': 3 }), GridSpec.for_dataset( 'gbm_prostate_regression_grid_max_20mS', 'gbm', datasets['prostate_regression'], {'max_depth': 3}, { 'ntrees': [1, 2, 4], 'distribution': ["gaussian", "poisson", "gamma", "tweedie"] }, { 'strategy': "RandomDiscrete", 'max_runtime_secs': 0.020 }), GridSpec.for_dataset( 'gbm_prostate_regression_grid_stopping_deviance', 'gbm', datasets['prostate_regression'], {}, { 'max_depth': [1, 2, 3, 4, 5, 6, 7], 'ntrees': [1, 2, 3, 4, 5, 6], 'distribution': ["gaussian", "poisson", "gamma"] }, { 'strategy': "RandomDiscrete", 'seed': 42, 'stopping_metric': 'deviance', 'stopping_tolerance': 0.00001, 'stopping_rounds': 5 }), GridSpec.for_dataset( 'gbm_prostate_regression_grid_stopping_auto', 'gbm', datasets['prostate_regression'], {}, { 'max_depth': [1, 2, 3, 4, 5, 6, 7], 'ntrees': [1, 2, 3, 4, 5, 6], 'distribution': ["gaussian", "poisson", "gamma"] }, { 'strategy': "RandomDiscrete", 'seed': 42, 'stopping_metric': 'AUTO', 'stopping_tolerance': 0.00001, 'stopping_rounds': 5 }), ] for grid_spec in grids_to_build: grid = grid_spec.build_and_validate_grid(a_node) for model_key in grid['model_ids']: model_key = model_key['name'] built_models[model_key] = a_node.models(key=model_key) # test search limits: max_models grid = a_node.grid(key='gbm_prostate_regression_grid_max_3') assert len( grid['model_ids'] ) == 3, "FAIL: using max_models, expected a max of 3 models, got: " + str( len(grid['model_ids'])) # test search limits: max_runtime_secs grid = a_node.grid(key='gbm_prostate_regression_grid_max_20mS') assert len( grid['model_ids'] ) < 12, "FAIL: using max_runtime_secs, expected less than 12 models, got: " + str( len(grid['model_ids'])) # test search limits: stopping_deviance grid = a_node.grid(key='gbm_prostate_regression_grid_stopping_deviance') deviance_model_count = len(grid['model_ids']) assert len( grid['model_ids'] ) < 126, "FAIL: using asymptotic deviance stopping criterion, expected less than 126 models, got: " + str( len(grid['model_ids'])) # test search limits: stopping_auto grid = a_node.grid(key='gbm_prostate_regression_grid_stopping_auto') auto_model_count = len(grid['model_ids']) assert len( grid['model_ids'] ) < 126, "FAIL: using asymptotic auto stopping criterion, expected less than 126 models, got: " + str( len(grid['model_ids'])) # test that AUTO gave the same answer as deviance assert deviance_model_count == auto_model_count, "FAIL: using asymptotic auto stopping criterion, expected the same number of models as deviance, got: " + str( auto_model_count) + " instead of: " + str(deviance_model_count) # grid = a_node.grid(key='kmeans_prostate_grid', sort_by='', decreasing=True) h2o_test_utils.fetch_and_validate_grid_sort(a_node, key='kmeans_prostate_grid', sort_by='totss', decreasing=True) h2o_test_utils.fetch_and_validate_grid_sort(a_node, key='kmeans_prostate_grid', sort_by='tot_withinss', decreasing=True) h2o_test_utils.fetch_and_validate_grid_sort(a_node, key='kmeans_prostate_grid', sort_by='betweenss', decreasing=True) h2o_test_utils.fetch_and_validate_grid_sort(a_node, key='kmeans_prostate_grid', sort_by='totss', decreasing=False) h2o_test_utils.fetch_and_validate_grid_sort(a_node, key='kmeans_prostate_grid', sort_by='tot_withinss', decreasing=False) h2o_test_utils.fetch_and_validate_grid_sort(a_node, key='kmeans_prostate_grid', sort_by='betweenss', decreasing=False) # import sys # sys.exit(0) ####################################### # Test default parameters validation for each model builder # if h2o_test_utils.isVerbose(): print('Testing ModelBuilder default parameters. . .') model_builders = a_node.model_builders(timeoutSecs=240)['model_builders'] # Do we know about all of them? server_algos = model_builders.keys() assert len( set(server_algos) - set(algos) ) == 0, "FAIL: Our set of algos doesn't match what the server knows about. Ours: " + repr( algos) + "; server's: " + repr(server_algos) for algo, model_builder in model_builders.iteritems(): parameters_list = model_builder['parameters'] test_parameters = { value['name']: value['default_value'] for value in parameters_list } # collect default parameters if algo in algo_additional_default_params: test_parameters.update(algo_additional_default_params[algo]) if h2o_test_utils.isVerboser(): print('Testing ' + algo + ' with params: ' + repr(test_parameters)) parameters_validation = a_node.validate_model_parameters( algo=algo, training_frame=None, parameters=test_parameters, timeoutSecs=240) # synchronous assert 'error_count' in parameters_validation, "FAIL: Failed to find error_count in good-parameters parameters validation result." h2o.H2O.verboseprint("Bad params validation messages: ", repr(parameters_validation)) expected_count = 0 if expected_count != parameters_validation['error_count']: print("validation errors: ") pp.pprint(parameters_validation) assert expected_count == parameters_validation[ 'error_count'], "FAIL: " + str( expected_count ) + " != error_count in good-parameters parameters validation result." ####################################### # Test DeepLearning parameters validation # # Default parameters: if h2o_test_utils.isVerbose(): print('Testing DeepLearning default parameters. . .') model_builder = a_node.model_builders( algo='deeplearning', timeoutSecs=240)['model_builders']['deeplearning'] dl_test_parameters_list = model_builder['parameters'] dl_test_parameters = { value['name']: value['default_value'] for value in dl_test_parameters_list } parameters_validation = a_node.validate_model_parameters( algo='deeplearning', training_frame=None, parameters=dl_test_parameters, timeoutSecs=240) # synchronous assert 'error_count' in parameters_validation, "FAIL: Failed to find error_count in good-parameters parameters validation result." h2o.H2O.verboseprint("Bad params validation messages: ", repr(parameters_validation)) if 0 != parameters_validation['error_count']: print("validation errors: ") pp.pprint(parameters_validation) assert 0 == parameters_validation[ 'error_count'], "FAIL: 0 != error_count in good-parameters parameters validation result." # Good parameters (note: testing with null training_frame): if h2o_test_utils.isVerbose(): print('Testing DeepLearning good parameters. . .') dl_test_parameters = { 'response_column': 'CAPSULE', 'hidden': "[10, 20, 10]" } parameters_validation = a_node.validate_model_parameters( algo='deeplearning', training_frame=None, parameters=dl_test_parameters, timeoutSecs=240) # synchronous assert 'error_count' in parameters_validation, "FAIL: Failed to find error_count in good-parameters parameters validation result." h2o.H2O.verboseprint("Bad params validation messages: ", repr(parameters_validation)) if 0 != parameters_validation['error_count']: print("validation errors: ") pp.pprint(parameters_validation) assert 0 == parameters_validation[ 'error_count'], "FAIL: 0 != error_count in good-parameters parameters validation result." # Bad parameters (hidden is null): # (note: testing with null training_frame) if h2o_test_utils.isVerbose(): print('Testing DeepLearning bad parameters, null training_frame. . .') dl_test_parameters = { 'response_column': 'CAPSULE', 'hidden': "[10, 20, 10]", 'input_dropout_ratio': 27 } parameters_validation = a_node.validate_model_parameters( algo='deeplearning', training_frame=None, parameters=dl_test_parameters, timeoutSecs=240) # synchronous assert 'error_count' in parameters_validation, "FAIL: Failed to find error_count in bad-parameters parameters validation result (input_dropout_ratio)." h2o.H2O.verboseprint("Good params validation messages: ", repr(parameters_validation)) assert 0 != parameters_validation[ 'error_count'], "FAIL: 0 == error_count in bad-parameters parameters validation result: " + repr( parameters_validation) found_expected_error = False for validation_message in parameters_validation['messages']: if validation_message['message_type'] == 'ERRR' and validation_message[ 'field_name'] == 'input_dropout_ratio': found_expected_error = True assert found_expected_error, "FAIL: Failed to find error message about input_dropout_ratio in the validation messages." # Bad parameters (no response_column): if h2o_test_utils.isVerbose(): print('Testing DeepLearning bad parameters, null response_column. . .') dl_test_parameters = {'hidden': "[10, 20, 10]"} parameters_validation = a_node.validate_model_parameters( algo='deeplearning', training_frame='prostate_binomial', parameters=dl_test_parameters, timeoutSecs=240) # synchronous assert 'error_count' in parameters_validation, "FAIL: Failed to find error_count in bad-parameters parameters validation result (response_column)." h2o.H2O.verboseprint("Good params validation messages: ", repr(parameters_validation)) assert 0 != parameters_validation[ 'error_count'], "FAIL: 0 == error_count in bad-parameters parameters validation result: " + repr( parameters_validation) ####################################### # Try to build DeepLearning model for Prostate but with bad parameters; we should get a ModelParametersSchema with the error. if h2o_test_utils.isVerbose(): print( 'About to try to build a DeepLearning model with bad parameters. . .' ) dl_prostate_bad_parameters = { 'response_column': 'CAPSULE', 'hidden': "[10, 20, 10]", 'input_dropout_ratio': 27 } parameters_validation = a_node.build_model( algo='deeplearning', model_id='deeplearning_prostate_binomial_bad', training_frame='prostate_binomial', parameters=dl_prostate_bad_parameters, timeoutSecs=240) # synchronous h2o_test_utils.validate_validation_messages(parameters_validation, ['input_dropout_ratio']) assert parameters_validation['__http_response'][ 'status_code'] == requests.codes.precondition_failed, "FAIL: expected 412 Precondition Failed from a bad build request, got: " + str( parameters_validation['__http_response']['status_code']) if h2o_test_utils.isVerbose(): print('Done trying to build DeepLearning model with bad parameters.') ##################################### # Early test of predict() # TODO: remove after we remove the early exit p = a_node.predict( model='deeplearning_airlines_binomial', frame='airlines_binomial', predictions_frame='deeplearning_airlines_binomial_predictions') h2o_test_utils.validate_predictions( a_node, p, 'deeplearning_airlines_binomial', 'airlines_binomial', 43978, predictions_frame='deeplearning_airlines_binomial_predictions') h2o_test_utils.validate_frame_exists( a_node, 'deeplearning_airlines_binomial_predictions') h2o.H2O.verboseprint("Predictions for scoring: ", 'deeplearning_airlines_binomial', " on: ", 'airlines_binomial', ": ", repr(p))
def load_and_test(a_node, pp): ################## # Test CreateFrame if h2o_test_utils.isVerbose(): print 'Testing CreateFrame. . .' created_job = a_node.create_frame(dest='created') # call with defaults a_node.poll_job(job_key=created_job['key']['name']) # wait until done and get CreateFrameV3 instance (aka the Job) frames = a_node.frames(key='created')['frames'] assert len(frames) == 1, "FAIL: expected to find 1 frame called 'created', found: " + str(len(frames)) assert frames[0]['frame_id']['name'] == 'created', "FAIL: expected to find 1 frame called 'created', found: " + repr(frames) created = frames[0] assert 'rows' in created, "FAIL: failed to find 'rows' field in CreateFrame result." assert created['rows'] == 10000, "FAIL: expected value of 'rows' field in CreateFrame result to be: " + str(10000) + ", found: " + str(created['rows']) assert 'columns' in created, "FAIL: failed to find 'columns' field in CreateFrame result." assert len(created['columns']) == 10, "FAIL: expected value of 'columns' field in CreateFrame result to be: " + str(10) + ", found: " + str(len(created['columns'])) ######################################################### # Import and test all the datasets we'll need for the subsequent tests: ######################################################### # dest_key, path, expected_rows, model_category, response_column, ignored_columns datasets_to_import = [ DatasetSpec('prostate_clustering', '../../smalldata/logreg/prostate.csv', 380, 'Clustering', None, ['ID']), DatasetSpec('prostate_binomial', '../../smalldata/logreg/prostate.csv', 380, 'Binomial', 'CAPSULE', ['ID']), DatasetSpec('prostate_regression', '../../smalldata/logreg/prostate.csv', 380, 'Regression', 'AGE', ['ID']), DatasetSpec('airlines_binomial', '../../smalldata/airlines/allyears2k_headers.zip', 43978, 'Binomial', 'IsDepDelayed', ['IsArrDelayed', 'ArrDelay', 'DepDelay']), # TODO: more ignored? DatasetSpec('iris_multinomial', '../../smalldata/iris/iris_wheader.csv', 150, 'Multinomial', 'class', []), ] datasets = {} # the dataset spec for dataset_spec in datasets_to_import: dataset = dataset_spec.import_and_validate_dataset(a_node) # it's also stored in dataset_spec['dataset'] datasets[dataset_spec['dest_key']] = dataset_spec ################################################ # Test /Frames for prostate.csv frames = a_node.frames(row_count=5)['frames'] frames_dict = h2o_test_utils.list_to_dict(frames, 'frame_id/name') # TODO: remove: if h2o_test_utils.isVerboser(): print "frames: " pp.pprint(frames) if h2o_test_utils.isVerboser(): print "frames_dict: " pp.pprint(frames_dict) assert 'prostate_binomial' in frames_dict, "FAIL: Failed to find " + 'prostate_binomial' + " in Frames list." assert not frames_dict['prostate_binomial']['is_text'], "FAIL: Parsed Frame is is_text" # Test /Frames/{key} for prostate.csv frames = a_node.frames(key='prostate_binomial', row_count=5)['frames'] frames_dict = h2o_test_utils.list_to_dict(frames, 'frame_id/name') assert 'prostate_binomial' in frames_dict, "FAIL: Failed to find prostate.hex in Frames list." columns_dict = h2o_test_utils.list_to_dict(frames[0]['columns'], 'label') assert 'CAPSULE' in columns_dict, "FAIL: Failed to find CAPSULE in Frames/prostate.hex." assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns." assert 'histogram_bins' in columns_dict['AGE'], "FAIL: Failed to find bins in Frames/prostate.hex/columns/AGE." h2o.H2O.verboseprint('bins: ' + repr(columns_dict['AGE']['histogram_bins'])) assert None is columns_dict['AGE']['histogram_bins'], "FAIL: Failed to clear bins field." # should be cleared except for /summary # Test /Frames/{key}/columns for prostate.csv frames = a_node.columns(key='prostate_binomial')['frames'] columns_dict = h2o_test_utils.list_to_dict(frames[0]['columns'], 'label') assert 'ID' in columns_dict, "FAIL: Failed to find ID in Frames/prostate.hex/columns." assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns." assert 'histogram_bins' in columns_dict['AGE'], "FAIL: Failed to find bins in Frames/prostate.hex/columns/AGE." h2o.H2O.verboseprint('bins: ' + repr(columns_dict['AGE']['histogram_bins'])) assert None is columns_dict['AGE']['histogram_bins'], "FAIL: Failed to clear bins field." # should be cleared except for /summary # Test /Frames/{key}/columns/{label} for prostate.csv frames = a_node.column(key='prostate_binomial', column='AGE')['frames'] columns_dict = h2o_test_utils.list_to_dict(frames[0]['columns'], 'label') assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns." assert 'histogram_bins' in columns_dict['AGE'], "FAIL: Failed to find bins in Frames/prostate.hex/columns/AGE." h2o.H2O.verboseprint('bins: ' + repr(columns_dict['AGE']['histogram_bins'])) assert None is columns_dict['AGE']['histogram_bins'], "FAIL: Failed to clear bins field." # should be cleared except for /summary # Test /Frames/{key}/columns/{label}/summary for prostate.csv frames = a_node.summary(key='prostate_binomial', column='AGE')['frames'] columns_dict = h2o_test_utils.list_to_dict(frames[0]['columns'], 'label') assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns/AGE/summary." col = columns_dict['AGE'] h2o_test_utils.assertKeysExistAndNonNull(col, '', ['label', 'missing_count', 'zero_count', 'positive_infinity_count', 'negative_infinity_count', 'mins', 'maxs', 'mean', 'sigma', 'type', 'data', 'precision', 'histogram_bins', 'histogram_base', 'histogram_stride', 'percentiles']) h2o_test_utils.assertKeysExist(col, '', ['domain', 'string_data']) assert col['mins'][0] == 43, 'FAIL: Failed to find 43 as the first min for AGE.' assert col['maxs'][0] == 79, 'FAIL: Failed to find 79 as the first max for AGE.' assert abs(col['mean'] - 66.03947368421052) < 1e-8, 'FAIL: Failed to find 66.03947368421052 as the mean for AGE.' assert abs(col['sigma'] - 6.527071269173308) < 1e-8, 'FAIL: Failed to find 6.527071269173308 as the sigma for AGE.' assert col['type'] == 'int', 'FAIL: Failed to find int as the type for AGE.' assert col['data'][0] == 65, 'FAIL: Failed to find 65 as the first data for AGE.' assert col['precision'] == -1, 'FAIL: Failed to find -1 as the precision for AGE.' assert col['histogram_bins'][0] == 1, 'FAIL: Failed to find 1 as the first bin for AGE.' assert col['histogram_base'] == 43, 'FAIL: Failed to find 43 as the histogram_base for AGE.' assert col['histogram_stride'] == 1, 'FAIL: Failed to find 1 as the histogram_stride for AGE.' assert col['percentiles'][0] == 44.516, 'FAIL: Failed to find 43.516 as the 0.1% percentile for AGE. '+str(col['percentiles'][0]) assert col['percentiles'][1] == 50.79, 'FAIL: Failed to find 50.79 as the 1.0% percentile for AGE. '+str(col['percentiles'][1]) assert col['percentiles'][15] == 78, 'FAIL: Failed to find 78 as the 99.0% percentile for AGE. '+str(col['percentiles'][15]) assert col['percentiles'][16] == 79, 'FAIL: Failed to find 79 as the 99.9% percentile for AGE. '+str(col['percentiles'][16]) # NB: col['percentiles'] corresponds to probs=[0.001,0.01,0.1,0.2,0.25,0.3,1.0/3.0,0.4,0.5,0.6,2.0/3.0,0.7,0.75,0.8,0.9,0.99,0.999] # Test /SplitFrame for prostate.csv if h2o_test_utils.isVerbose(): print 'Testing SplitFrame with named destination_frames. . .' splits = a_node.split_frame(dataset='prostate_binomial', ratios=[0.8], destination_frames=['bigger', 'smaller']) frames = a_node.frames()['frames'] h2o_test_utils.validate_frame_exists(a_node, 'bigger', frames) h2o_test_utils.validate_frame_exists(a_node, 'smaller', frames) bigger = a_node.frames(key='bigger')['frames'][0] smaller = a_node.frames(key='smaller')['frames'][0] assert bigger['rows'] == 304, 'FAIL: 80/20 SplitFrame yielded the wrong number of rows. Expected: 304; got: ' + bigger['rows'] assert smaller['rows'] == 76, 'FAIL: 80/20 SplitFrame yielded the wrong number of rows. Expected: 76; got: ' + smaller['rows'] # TODO: h2o_test_utils.validate_job_exists(a_node, splits['frame_id']['name']) if h2o_test_utils.isVerbose(): print 'Testing SplitFrame with generated destination_frames. . .' splits = a_node.split_frame(dataset='prostate_binomial', ratios=[0.5]) frames = a_node.frames()['frames'] h2o_test_utils.validate_frame_exists(a_node, splits['destination_frames'][0]['name'], frames) h2o_test_utils.validate_frame_exists(a_node, splits['destination_frames'][1]['name'], frames) first = a_node.frames(key=splits['destination_frames'][0]['name'])['frames'][0] second = a_node.frames(key=splits['destination_frames'][1]['name'])['frames'][0] assert first['rows'] == 190, 'FAIL: 50/50 SplitFrame yielded the wrong number of rows. Expected: 190; got: ' + first['rows'] assert second['rows'] == 190, 'FAIL: 50/50 SplitFrame yielded the wrong number of rows. Expected: 190; got: ' + second['rows'] # TODO: h2o_test_utils.validate_job_exists(a_node, splits['frame_id']['name']) return datasets
def load_and_test(a_node, pp): ################## # Test CreateFrame if h2o_test_utils.isVerbose(): print 'Testing CreateFrame. . .' created_job = a_node.create_frame(dest='created') # call with defaults a_node.poll_job(job_key=created_job['key']['name']) # wait until done and get CreateFrameV3 instance (aka the Job) frames = a_node.frames(key='created')['frames'] assert len(frames) == 1, "FAIL: expected to find 1 frame called 'created', found: " + str(len(frames)) assert frames[0]['frame_id']['name'] == 'created', "FAIL: expected to find 1 frame called 'created', found: " + repr(frames) created = frames[0] assert 'rows' in created, "FAIL: failed to find 'rows' field in CreateFrame result." assert created['rows'] == 10000, "FAIL: expected value of 'rows' field in CreateFrame result to be: " + str(10000) + ", found: " + str(created['rows']) assert 'columns' in created, "FAIL: failed to find 'columns' field in CreateFrame result." assert len(created['columns']) == 10, "FAIL: expected value of 'columns' field in CreateFrame result to be: " + str(10) + ", found: " + str(len(created['columns'])) # Test CreateFrame -- With Wrong parameter param_job = a_node.create_frame(raiseIfNon200=False, dests='created') h2o_test_utils.validate_412_statusCode(param_job) h2o_test_utils.validate_412_InfoMessage(param_job, "Unknown parameter: dests") ######################################################### # Import and test all the datasets we'll need for the subsequent tests: ######################################################### # dest_key, path, expected_rows, model_category, response_column, ignored_columns datasets_to_import = [ DatasetSpec('prostate_clustering', '../../smalldata/logreg/prostate.csv', 380, 'Clustering', None, ['ID']), DatasetSpec('prostate_binomial', '../../smalldata/logreg/prostate.csv', 380, 'Binomial', 'CAPSULE', ['ID']), DatasetSpec('prostate_regression', '../../smalldata/logreg/prostate.csv', 380, 'Regression', 'AGE', ['ID']), DatasetSpec('prostate_delete', '../../smalldata/logreg/prostate.csv', 380, 'Regression', 'AGE', ['ID']), DatasetSpec('prostate_spt_negetive', '../../smalldata/logreg/prostate.csv', 380, 'Regression', 'AGE', ['ID']), DatasetSpec('airlines_binomial', '../../smalldata/airlines/allyears2k_headers.zip', 43978, 'Binomial', 'IsDepDelayed', ['IsArrDelayed', 'ArrDelay', 'DepDelay']), # TODO: more ignored? DatasetSpec('iris_multinomial', '../../smalldata/iris/iris_wheader.csv', 150, 'Multinomial', 'class', []), ] datasets = {} # the dataset spec for dataset_spec in datasets_to_import: dataset = dataset_spec.import_and_validate_dataset(a_node) # it's also stored in dataset_spec['dataset'] datasets[dataset_spec['dest_key']] = dataset_spec ################################################ # Test /Frames for prostate.csv frames = a_node.frames(row_count=5)['frames'] frames_dict = h2o_test_utils.list_to_dict(frames, 'frame_id/name') # TODO: remove: if h2o_test_utils.isVerboser(): print "frames: " pp.pprint(frames) if h2o_test_utils.isVerboser(): print "frames_dict: " pp.pprint(frames_dict) assert 'prostate_binomial' in frames_dict, "FAIL: Failed to find " + 'prostate_binomial' + " in Frames list." assert not frames_dict['prostate_binomial']['is_text'], "FAIL: Parsed Frame is is_text" # Test /Frames/{key} for prostate.csv frames = a_node.frames(key='prostate_binomial', row_count=5)['frames'] frames_dict = h2o_test_utils.list_to_dict(frames, 'frame_id/name') assert 'prostate_binomial' in frames_dict, "FAIL: Failed to find prostate.hex in Frames list." columns_dict = h2o_test_utils.list_to_dict(frames[0]['columns'], 'label') assert 'CAPSULE' in columns_dict, "FAIL: Failed to find CAPSULE in Frames/prostate.hex." assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns." assert 'histogram_bins' in columns_dict['AGE'], "FAIL: Failed to find bins in Frames/prostate.hex/columns/AGE." h2o.H2O.verboseprint('bins: ' + repr(columns_dict['AGE']['histogram_bins'])) assert None is columns_dict['AGE']['histogram_bins'], "FAIL: Failed to clear bins field." # should be cleared except for /summary # Test /Frames for prostate.csv -- Wrong Parameter print("Testing the Bad Parameters.....") param_frames = a_node.frames_negetive(row_counts=5) h2o_test_utils.validate_412_statusCode(param_frames) h2o_test_utils.validate_412_InfoMessage(param_frames, "Unknown parameter: row_counts") print("DONE Testing the Bad Parameters.....") # Test /Frames/{key}/columns for prostate.csv frames = a_node.columns(key='prostate_binomial')['frames'] columns_dict = h2o_test_utils.list_to_dict(frames[0]['columns'], 'label') assert 'ID' in columns_dict, "FAIL: Failed to find ID in Frames/prostate.hex/columns." assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns." assert 'histogram_bins' in columns_dict['AGE'], "FAIL: Failed to find bins in Frames/prostate.hex/columns/AGE." h2o.H2O.verboseprint('bins: ' + repr(columns_dict['AGE']['histogram_bins'])) assert None is columns_dict['AGE']['histogram_bins'], "FAIL: Failed to clear bins field." # should be cleared except for /summary # Test /Frames/{key} for prostate.csv ### worong Parameter key_param_frames = a_node.frames_negetive(key='prostate_binomial', row_offsets=10) h2o_test_utils.validate_412_statusCode(key_param_frames) h2o_test_utils.validate_412_InfoMessage(key_param_frames, "Unknown parameter: row_offsets") # Test /Frames/{key}/columns/{label} for prostate.csv frames = a_node.column(key='prostate_binomial', column='AGE')['frames'] columns_dict = h2o_test_utils.list_to_dict(frames[0]['columns'], 'label') assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns." assert 'histogram_bins' in columns_dict['AGE'], "FAIL: Failed to find bins in Frames/prostate.hex/columns/AGE." h2o.H2O.verboseprint('bins: ' + repr(columns_dict['AGE']['histogram_bins'])) assert None is columns_dict['AGE']['histogram_bins'], "FAIL: Failed to clear bins field." # should be cleared except for /summary # Test /Frames/{key}/columns/{label} for prostate.csv ### Wrong param col_param_frames = a_node.frames_negetive(key='prostate_binomial', column_counts=3) h2o_test_utils.validate_412_statusCode(col_param_frames) h2o_test_utils.validate_412_InfoMessage(col_param_frames, "Unknown parameter: column_counts") # Test /Frames/{key}/columns/{label}/summary for prostate.csv frames = a_node.summary(key='prostate_binomial', column='AGE')['frames'] columns_dict = h2o_test_utils.list_to_dict(frames[0]['columns'], 'label') assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns/AGE/summary." col = columns_dict['AGE'] h2o_test_utils.assertKeysExistAndNonNull(col, '', ['label', 'missing_count', 'zero_count', 'positive_infinity_count', 'negative_infinity_count', 'mins', 'maxs', 'mean', 'sigma', 'type', 'data', 'precision', 'histogram_bins', 'histogram_base', 'histogram_stride', 'percentiles']) h2o_test_utils.assertKeysExist(col, '', ['domain', 'string_data']) assert col['mins'][0] == 43, 'FAIL: Failed to find 43 as the first min for AGE.' assert col['maxs'][0] == 79, 'FAIL: Failed to find 79 as the first max for AGE.' assert abs(col['mean'] - 66.03947368421052) < 1e-8, 'FAIL: Failed to find 66.03947368421052 as the mean for AGE.' assert abs(col['sigma'] - 6.527071269173308) < 1e-8, 'FAIL: Failed to find 6.527071269173308 as the sigma for AGE.' assert col['type'] == 'int', 'FAIL: Failed to find int as the type for AGE.' assert col['data'][0] == 65, 'FAIL: Failed to find 65 as the first data for AGE.' assert col['precision'] == -1, 'FAIL: Failed to find -1 as the precision for AGE.' assert col['histogram_bins'][0] == 1, 'FAIL: Failed to find 1 as the first bin for AGE.' assert col['histogram_base'] == 43, 'FAIL: Failed to find 43 as the histogram_base for AGE.' assert col['histogram_stride'] == 1, 'FAIL: Failed to find 1 as the histogram_stride for AGE.' assert col['percentiles'][0] == 44.516, 'FAIL: Failed to find 43.516 as the 0.1% percentile for AGE. '+str(col['percentiles'][0]) assert col['percentiles'][1] == 50.79, 'FAIL: Failed to find 50.79 as the 1.0% percentile for AGE. '+str(col['percentiles'][1]) assert col['percentiles'][9] == 78, 'FAIL: Failed to find 78 as the 99.0% percentile for AGE. '+str(col['percentiles'][9]) assert col['percentiles'][10] == 79, 'FAIL: Failed to find 79 as the 99.9% percentile for AGE. '+str(col['percentiles'][10]) # NB: col['percentiles'] corresponds to probs=[0.001, 0.01, 0.1, 0.25, 0.333, 0.5, 0.667, 0.75, 0.9, 0.99, 0.999] # Test /Frames/{key}/columns/{label} for prostate.csv ### Wrong param summary_param_frames = a_node.frames_negetive(key='prostate_binomial', find_compatible_model=0) h2o_test_utils.validate_412_statusCode(summary_param_frames) h2o_test_utils.validate_412_InfoMessage(summary_param_frames, "Unknown parameter: find_compatible_model") #Test /Frames/{key}/export for prostate.csv ### Positive job = a_node.export(key='prostate_binomial', row_count=100, path="/Users/sureshvuyyuru/export", force="true")['job'] assert job['dest']['name'] == '/Users/sureshvuyyuru/export', "FAIL: Export Name is not as Expected" assert job['description'] == 'Export frame', "FAIL: Frame Descrion is NOT Correct" #Test /Frames/{key}/export for prostate.csv ### Wrong param job_negetive = a_node.export(key='prostate_binomial', raiseIfNon200=False, row_count=100, path="/Users/sureshvuyyuru/export", forces="true") h2o_test_utils.validate_412_statusCode(job_negetive) h2o_test_utils.validate_412_InfoMessage(job_negetive, 'Unknown parameter: forces') #Suresh domain ### Positive # Test /Frames/{key}/columns/{label}/domain for created domains = a_node.domain(key='iris_multinomial', raiseIfNon200=False, column='class', row_offset=0, row_count=100) domain = domains['domain'] #pp.pprint(domains) assert 'Iris-setosa' in domain[0], "FAIL: Does not find the domain value" assert 'Iris-virginica' in domain[0], "FAIL: Does not find the domain Value" #Suresh domain ### Negetive domains_negetive = a_node.domain(key='iris_multinomial', column='class', domain='Iris-setosa') h2o_test_utils.validate_412_statusCode(domains_negetive) h2o_test_utils.validate_412_InfoMessage(domains_negetive, 'Attempting to set output field: domain for class: class water.api.FramesV3') # Suresh -- ### delete Frame #Test DELETE /Frames/{key} for prostate.csv resp = a_node.delete_frame(key='prostate_delete') assert resp['row_count'] == 0, "FAIL: Row count is not Zero in delete frame" assert 'rows' not in resp, "FAIL: Row count is not Zero in delete frame" print("prostate_delete Frame has deleted...") # Test /SplitFrame for prostate.csv if h2o_test_utils.isVerbose(): print 'Testing SplitFrame with named destination_frames. . .' splits = a_node.split_frame(dataset='prostate_binomial', ratios=[0.8], destination_frames=['bigger', 'smaller']) frames = a_node.frames()['frames'] h2o_test_utils.validate_frame_exists(a_node, 'bigger', frames) h2o_test_utils.validate_frame_exists(a_node, 'smaller', frames) bigger = a_node.frames(key='bigger')['frames'][0] smaller = a_node.frames(key='smaller')['frames'][0] assert bigger['rows'] == 304, 'FAIL: 80/20 SplitFrame yielded the wrong number of rows. Expected: 304; got: ' + bigger['rows'] assert smaller['rows'] == 76, 'FAIL: 80/20 SplitFrame yielded the wrong number of rows. Expected: 76; got: ' + smaller['rows'] # TODO: h2o_test_utils.validate_job_exists(a_node, splits['frame_id']['name']) if h2o_test_utils.isVerbose(): print 'Testing SplitFrame with generated destination_frames. . .' splits = a_node.split_frame(dataset='prostate_binomial', ratios=[0.5]) frames = a_node.frames()['frames'] h2o_test_utils.validate_frame_exists(a_node, splits['destination_frames'][0]['name'], frames) h2o_test_utils.validate_frame_exists(a_node, splits['destination_frames'][1]['name'], frames) first = a_node.frames(key=splits['destination_frames'][0]['name'])['frames'][0] second = a_node.frames(key=splits['destination_frames'][1]['name'])['frames'][0] assert first['rows'] == 190, 'FAIL: 50/50 SplitFrame yielded the wrong number of rows. Expected: 190; got: ' + first['rows'] assert second['rows'] == 190, 'FAIL: 50/50 SplitFrame yielded the wrong number of rows. Expected: 190; got: ' + second['rows'] # TODO: h2o_test_utils.validate_job_exists(a_node, splits['frame_id']['name']) # Test /SplitFrame for prostate.csv ### Split negetive print 'Testing SplitFrame with Wrong params...' splits_neg = a_node.split_frame( raiseIfNon200=False, dataset='prostate_spt_negetive', ratio=[0.4], destination_frames=['bigger', 'smaller']) h2o_test_utils.validate_412_statusCode(splits_neg) h2o_test_utils.validate_412_InfoMessage(splits_neg, 'Unknown parameter: ratio') #Ratio must be between 0 and 1! return datasets
def test(a_node, pp): #################################### # test schemas collection GET if h2o_test_utils.isVerbose(): print('Testing /Metadata/schemas. . .') schemas = a_node.schemas(timeoutSecs=240) assert 'schemas' in schemas, "FAIL: failed to find 'schemas' field in output of /Metadata/schemas: " + repr( schemas) assert type( schemas['schemas'] ) is list, "'schemas' field in output of /Metadata/schemas is not a list: " + repr( schemas) assert len( schemas['schemas'] ) > 0, "'schemas' field in output of /Metadata/schemas is empty: " + repr( schemas) if h2o_test_utils.isVerboser(): print('Schemas: ') pp.pprint(schemas) #################################### # test schemas individual GET if h2o_test_utils.isVerbose(): print('Testing /Metadata/schemas/FrameV3. . .') schemas = a_node.schema(schemaname='FrameV3', timeoutSecs=240) assert 'schemas' in schemas, "FAIL: failed to find 'schemas' field in output of /Metadata/schemas/FrameV3: " + repr( schemas) assert type( schemas['schemas'] ) is list, "'schemas' field in output of /Metadata/schemas/FrameV3 is not a list: " + repr( schemas) assert len( schemas['schemas'] ) == 1, "'schemas' field in output of /Metadata/schemas/FrameV3 has an unexpected length: " + repr( schemas) if h2o_test_utils.isVerboser(): print('Schemas: ') pp.pprint(schemas) ######################### # test Metadata/endpoints if h2o_test_utils.isVerbose(): print('Testing /Metadata/endpoints. . .') endpoints = a_node.endpoints() assert 'routes' in endpoints, "FAIL: failed to find routes in the endpoints result." assert type( endpoints['routes'] ) is list, "FAIL: routes in the endpoints result is not a list." assert len(endpoints['routes'] ) > 0, "FAIL: routes list in the endpoints result is empty." assert type( endpoints['routes'] [0]) is dict, "FAIL: routes[0] in the endpoints result is not a dict." assert 'input_schema' in endpoints['routes'][ 0], "FAIL: routes[0] in the endpoints result does not have an 'input_schema' field." ######################### # test Metadata/schemas if h2o_test_utils.isVerbose(): print('Testing /Metadata/schemas. . .') schemas = a_node.schemas() assert 'schemas' in schemas, "FAIL: failed to find schemas in the schemas result." assert type(schemas['schemas'] ) is list, "FAIL: schemas in the schemas result is not a list." assert len(schemas['schemas'] ) > 0, "FAIL: schemas list in the schemas result is empty." assert type( schemas['schemas'] [0]) is dict, "FAIL: schemas[0] in the schemas result is not a dict." assert 'fields' in schemas['schemas'][ 0], "FAIL: schemas[0] in the schemas result does not have an 'fields' field."
def test(a_node, pp, algos): ################################## # Test cluster status cloud = a_node.cloud() if h2o_test_utils.isVerboser(): print('Cloud: ') pp.pprint(cloud) not_ok = a_node.cloud_is_bad() assert not not_ok, "FAIL: cloud status is not ok! Reason: " + not_ok jobs = a_node.jobs() if h2o_test_utils.isVerboser(): print('Jobs: ') pp.pprint(jobs) assert 'jobs' in jobs, "FAIL: 'jobs' element not found in the result of /Jobs" ################################## # Test object collection endpoints models = a_node.models() if h2o_test_utils.isVerboser(): print('Models: ') pp.pprint(models) models = a_node.models(api_version=92) # note: tests API version fallback if h2o_test_utils.isVerboser(): print('ModelsV92: ') pp.pprint(models) frames = a_node.frames(row_count=5) if h2o_test_utils.isVerboser(): print('Frames: ') pp.pprint(frames) #################################### # test model_builders collection GET if h2o_test_utils.isVerbose(): print('Testing /ModelBuilders. . .') model_builders = a_node.model_builders(timeoutSecs=240) if h2o_test_utils.isVerboser(): print('ModelBuilders: ') pp.pprint(model_builders) for algo in algos: assert algo in model_builders[ 'model_builders'], "FAIL: Failed to find algo: " + algo builder = model_builders['model_builders'][algo] h2o_test_utils.validate_builder(algo, builder) #################################### # test model_builders individual GET if h2o_test_utils.isVerbose(): print('Testing /ModelBuilders/{algo}. . .') for algo in algos: model_builder = a_node.model_builders(algo=algo, timeoutSecs=240) assert algo in model_builder[ 'model_builders'], "FAIL: Failed to find algo: " + algo builder = model_builders['model_builders'][algo] h2o_test_utils.validate_builder(algo, builder) #################################### # test model_metrics collection GET if h2o_test_utils.isVerbose(): print('Testing /ModelMetrics. . .') model_metrics = a_node.model_metrics(timeoutSecs=240) if h2o_test_utils.isVerboser(): print('ModelMetrics[0]: ') pp.pprint(model_metrics['model_metrics'][0]) #################################### # test model_metrics individual GET model_metrics = a_node.model_metrics( timeoutSecs=240, model="deeplearning_prostate_binomial", frame="prostate_binomial")