def validate(to_predict, from_data, accuracy_score_functions, learn_args=None, test_args=None): if learn_args is None: learn_args = {} if test_args is None: test_args = {} name = str(uuid.uuid4()).replace('-','') predictor = Predictor(name) predictor.learn(to_predict, from_data, **learn_args) validation_data = predictor.transaction.input_data.validation_df accuracy = predictor.test(when_data=validation_data, accuracy_score_functions=accuracy_score_functions, **test_args) delete_model(name) return accuracy
def test_house_pricing(self, use_gpu): """ Tests whole pipeline from downloading the dataset to making predictions and explanations. """ # Create & Learn name = 'home_rentals_price' mdb = Predictor(name=name) mdb.learn( to_predict='rental_price', from_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", backend='lightwood', stop_training_in_x_seconds=80, use_gpu=use_gpu) def assert_prediction_interface(predictions): for prediction in predictions: assert hasattr(prediction, 'explanation') test_results = mdb.test( when_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", accuracy_score_functions=r2_score, predict_args={'use_gpu': use_gpu}) assert test_results['rental_price_accuracy'] >= 0.8 predictions = mdb.predict( when_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", use_gpu=use_gpu) assert_prediction_interface(predictions) predictions = mdb.predict(when_data={'sqft': 300}, use_gpu=use_gpu) assert_prediction_interface(predictions) amd = F.get_model_data(name) assert isinstance(json.dumps(amd), str) for k in [ 'status', 'name', 'version', 'data_source', 'current_phase', 'updated_at', 'created_at', 'train_end_at' ]: assert isinstance(amd[k], str) assert isinstance(amd['predict'], (list, str)) assert isinstance(amd['is_active'], bool) for k in ['validation_set_accuracy', 'accuracy']: assert isinstance(amd[k], float) for k in amd['data_preparation']: assert isinstance(amd['data_preparation'][k], (int, float)) for k in amd['data_analysis']: assert (len(amd['data_analysis'][k]) > 0) assert isinstance(amd['data_analysis'][k][0], dict) model_analysis = amd['model_analysis'] assert (len(model_analysis) > 0) assert isinstance(model_analysis[0], dict) input_importance = model_analysis[0]["overall_input_importance"] assert (len(input_importance) > 0) assert isinstance(input_importance, dict) for k in ['train', 'test', 'valid']: assert isinstance(model_analysis[0][k + '_data_accuracy'], dict) assert len(model_analysis[0][k + '_data_accuracy']) == 1 assert model_analysis[0][k + '_data_accuracy']['rental_price'] > 0.4 for column, importance in zip(input_importance["x"], input_importance["y"]): assert isinstance(column, str) assert (len(column) > 0) assert isinstance(importance, (float, int)) assert (importance >= 0 and importance <= 10) # Test confidence estimation after save -> load p = None F.export_predictor(name) F.import_model(f"{name}.zip", f"{name}-new") p = Predictor(name=f'{name}-new') predictions = p.predict(when_data={'sqft': 1000}, use_gpu=use_gpu, run_confidence_variation_analysis=True) assert_prediction_interface(predictions)