Exemplo n.º 1
0
    def test_house_pricing(self, use_gpu):
        """
        Tests whole pipeline from downloading the dataset to making predictions and explanations.
        """
        # Create & Learn
        name = 'home_rentals_price'
        mdb = Predictor(name=name)
        mdb.learn(
            to_predict='rental_price',
            from_data=
            "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",
            backend='lightwood',
            stop_training_in_x_seconds=80,
            use_gpu=use_gpu)

        def assert_prediction_interface(predictions):
            for prediction in predictions:
                assert hasattr(prediction, 'explanation')

        test_results = mdb.test(
            when_data=
            "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",
            accuracy_score_functions=r2_score,
            predict_args={'use_gpu': use_gpu})
        assert test_results['rental_price_accuracy'] >= 0.8

        predictions = mdb.predict(
            when_data=
            "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",
            use_gpu=use_gpu)
        assert_prediction_interface(predictions)
        predictions = mdb.predict(when_data={'sqft': 300}, use_gpu=use_gpu)
        assert_prediction_interface(predictions)

        amd = F.get_model_data(name)
        assert isinstance(json.dumps(amd), str)

        for k in [
                'status', 'name', 'version', 'data_source', 'current_phase',
                'updated_at', 'created_at', 'train_end_at'
        ]:
            assert isinstance(amd[k], str)

        assert isinstance(amd['predict'], (list, str))
        assert isinstance(amd['is_active'], bool)

        for k in ['validation_set_accuracy', 'accuracy']:
            assert isinstance(amd[k], float)

        for k in amd['data_preparation']:
            assert isinstance(amd['data_preparation'][k], (int, float))

        for k in amd['data_analysis']:
            assert (len(amd['data_analysis'][k]) > 0)
            assert isinstance(amd['data_analysis'][k][0], dict)

        model_analysis = amd['model_analysis']
        assert (len(model_analysis) > 0)
        assert isinstance(model_analysis[0], dict)
        input_importance = model_analysis[0]["overall_input_importance"]
        assert (len(input_importance) > 0)
        assert isinstance(input_importance, dict)

        for k in ['train', 'test', 'valid']:
            assert isinstance(model_analysis[0][k + '_data_accuracy'], dict)
            assert len(model_analysis[0][k + '_data_accuracy']) == 1
            assert model_analysis[0][k +
                                     '_data_accuracy']['rental_price'] > 0.4

        for column, importance in zip(input_importance["x"],
                                      input_importance["y"]):
            assert isinstance(column, str)
            assert (len(column) > 0)
            assert isinstance(importance, (float, int))
            assert (importance >= 0 and importance <= 10)

        # Test confidence estimation after save -> load
        p = None
        F.export_predictor(name)
        F.import_model(f"{name}.zip", f"{name}-new")
        p = Predictor(name=f'{name}-new')
        predictions = p.predict(when_data={'sqft': 1000},
                                use_gpu=use_gpu,
                                run_confidence_variation_analysis=True)
        assert_prediction_interface(predictions)
Exemplo n.º 2
0
 def export_model(self, name):
     F.export_predictor(model_name=name)