示例#1
0
    def test_process_prediction(self):
        model_path = 'tests/fixtures/test_predict_by_model/iris'
        options = fsclient.read_json_file(
            os.path.join(model_path, "options.json"))
        target_categories = ["setosa", "versicolor", "virginica"]

        ds = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        ds.drop([options['targetFeature']])
        results = [
            "setosa", "versicolor", "virginica", "setosa", "versicolor",
            "virginica"
        ]
        results_proba = None
        proba_classes = None

        ModelHelper.process_prediction(ds, results, results_proba,
                                       proba_classes, None,
                                       options.get('minority_target_class'),
                                       options['targetFeature'],
                                       target_categories)

        ds_test = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        self.assertEqual(ds.dtypes, ds_test.dtypes)
        self.assertEqual(ds.df.values.tolist(), ds_test.df.values.tolist())
示例#2
0
    def test_process_prediction_proba(self):
        model_path = 'tests/fixtures/test_predict_by_model/iris'
        options = fsclient.read_json_file(
            os.path.join(model_path, "options.json"))
        target_categories = ["setosa", "versicolor", "virginica"]

        ds = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        ds.drop([options['targetFeature']])
        results = None  #[0, 1, 2, 0, 1, 2]
        results_proba = [[0.8, 0.1, 0.1], [0.4, 0.6, 0.1], [0.1, 0.2, 0.7],
                         [0.7, 0.2, 0.1], [0.3, 0.7, 0.1], [0.1, 0.3, 0.6]]
        results_proba = np.array(results_proba)
        proba_classes = [0, 1, 2]

        ModelHelper.process_prediction(ds, results, results_proba,
                                       proba_classes, 0.5, None,
                                       options['targetFeature'],
                                       target_categories)

        ds_test = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        self.assertEqual(
            ds.columns, ds_test.columns +
            ["proba_setosa", "proba_versicolor", "proba_virginica"])
        self.assertEqual(ds.df[options['targetFeature']].values.tolist(),
                         ds_test.df[options['targetFeature']].values.tolist())
示例#3
0
    def test_calculate_scores(self):
        model_path = 'tests/fixtures/test_predict_by_model/iris'
        options = fsclient.read_json_file(
            os.path.join(model_path, "options.json"))

        y_test, _ = ModelHelper.preprocess_target(model_path,
                                                  records=[["setosa"],
                                                           ["versicolor"],
                                                           ["virginica"],
                                                           ["setosa"],
                                                           ["versicolor"],
                                                           ["virginica"]],
                                                  features=["species"])
        y_pred, _ = ModelHelper.preprocess_target(model_path,
                                                  records=[["setosa"],
                                                           ["versicolor"],
                                                           ["versicolor"],
                                                           ["setosa"],
                                                           ["versicolor"],
                                                           ["virginica"]],
                                                  features=["species"])

        scores = ModelHelper.calculate_scores(options,
                                              y_test=y_test,
                                              y_pred=y_pred)
        self.assertEqual(len(scores), len(options['scoreNames']))
        self.assertTrue(scores['accuracy'] > 0.8)
示例#4
0
文件: model.py 项目: chrinide/a2ml
    def predict(self, filename, model_id,
        threshold=None, locally=False, data=None, columns=None, output = None,
        json_result=False, count_in_result=False, prediction_date=None, prediction_id=None):
        ds = DataFrame.create_dataframe(filename, data, columns)
        model_path = self.ctx.config.get_model_path(model_id)
        options = fsclient.read_json_file(os.path.join(model_path, "options.json"))

        results, results_proba, proba_classes, target_categories = \
            self._predict_locally(ds.df, model_id, threshold) if locally else self._predict_remotely(ds.df, model_id, threshold)

        if target_categories and len(target_categories) == 2:
            for idx, item in enumerate(target_categories):
                if item == "False":
                    target_categories[idx] = False
                if item == "True":
                    target_categories[idx] = True

        ModelHelper.process_prediction(ds,
            results, results_proba, proba_classes,
            threshold,
            options.get('minority_target_class', self.ctx.config.get('minority_target_class')),
            options.get('targetFeature', self.ctx.config.get('target', None)),
            target_categories)

        predicted = ModelHelper.save_prediction(ds, prediction_id,
            options.get('support_review_model', True), json_result, count_in_result, prediction_date,
            model_path, model_id, output)

        if filename:
            self.ctx.log('Predictions stored in %s' % predicted)

        return {'predicted': predicted}
示例#5
0
文件: predict.py 项目: chrinide/a2ml
    def _predict_locally(self, filename_arg, model_id, threshold, data,
                         columns, output):
        model_deploy = ModelDeploy(self.ctx, None)
        is_model_loaded, model_path, model_name = \
            model_deploy.verify_local_model(model_id)

        if not is_model_loaded:
            raise AugerException(
                'Model isn\'t loaded locally. '
                'Please use a2ml deploy command to download model.')

        model_path, model_existed = self._extract_model(model_name)
        model_options = fsclient.read_json_file(
            os.path.join(model_path, "model", "options.json"))

        filename = filename_arg
        if not filename:
            ds = DataFrame.create_dataframe(filename, data, columns)
            filename = os.path.join(self.ctx.config.get_path(), '.augerml',
                                    'predict_data.csv')
            ds.saveToCsvFile(filename, compression=None)

        try:
            predicted = \
                self._docker_run_predict(filename, threshold, model_path)
        finally:
            # clean up unzipped model
            # if it wasn't unzipped before
            if not model_existed:
                shutil.rmtree(model_path, ignore_errors=True)
                model_path = None

        if not filename_arg:
            ds_result = DataFrame.create_dataframe(predicted)

            ds_result.options['data_path'] = None
            ds_result.loaded_columns = columns

            return ModelHelper.save_prediction_result(
                ds_result,
                prediction_id=None,
                support_review_model=model_options.get("support_review_model")
                if model_path else False,
                json_result=False,
                count_in_result=False,
                prediction_date=None,
                model_path=model_path,
                model_id=model_id,
                output=output)
        elif output:
            fsclient.move_file(predicted, output)
            predicted = output

        return predicted
示例#6
0
    def test_preprocess_target(self):
        model_path = 'tests/fixtures/test_predict_by_model/iris'

        y_true, target_categoricals = ModelHelper.preprocess_target(
            model_path,
            records=[["setosa"], ["versicolor"], ["virginica"], ["setosa"],
                     ["versicolor"], ["virginica"]],
            features=["species"])
        self.assertEqual(list(y_true), [0, 1, 2, 0, 1, 2])
        self.assertEqual(
            target_categoricals,
            {'species': {
                'categories': ['setosa', 'versicolor', 'virginica']
            }})
示例#7
0
文件: model.py 项目: chrinide/a2ml
    def deploy(self, model_id, locally, review):
        if locally:
            is_loaded, model_path = self.verify_local_model(model_id)
            if is_loaded:
                self.ctx.log('Model already deployed to %s' % model_path)
                return {'model_id': model_id}

        from azureml.train.automl.run import AutoMLRun

        ws, experiment = self._get_experiment()
        model_run = AutoMLRun(experiment = experiment, run_id = model_id)        

        result = self._deploy_locally(model_id, model_run, ws, experiment) if locally else \
            self._deploy_remotly(model_id, model_run, ws, experiment)
        
        model_features, target_categories = self._get_remote_model_features(model_run)
        feature_importance = self._get_feature_importance(model_run)

        options = {
            'uid': model_id,
            'targetFeature': self.ctx.config.get('target'),
            'support_review_model': review,
            'provider': self.ctx.config.name,
            'scoreNames': [self.ctx.config.get('experiment/metric')],
            'scoring': self.ctx.config.get('experiment/metric'),
            "score_name": self.ctx.config.get('experiment/metric'),
            "originalFeatureColumns": model_features
        }
        options.update(self._get_a2ml_info())
        fsclient.write_json_file(os.path.join(self.ctx.config.get_model_path(model_id), "options.json"),
            options)
        fsclient.write_json_file(os.path.join(self.ctx.config.get_model_path(model_id), "target_categoricals.json"), 
            {self.ctx.config.get('target'): {"categories": target_categories}})

        metric_path = ModelHelper.get_metric_path( options, model_id)
        fsclient.write_json_file(os.path.join(metric_path, "metric_names_feature_importance.json"), 
            {'feature_importance_data': {
                'features': list(feature_importance.keys()), 
                'scores': list(feature_importance.values())
            }})

        return result
示例#8
0
文件: predict.py 项目: ANN-KOREA/a2ml
    def _predict_on_cloud(self, filename, model_id, threshold, data, columns,
                          output):
        ds = DataFrame.create_dataframe(filename, data, columns)

        pipeline_api = AugerPipelineApi(self.ctx, None, model_id)
        predictions = pipeline_api.predict(ds.get_records(), ds.columns,
                                           threshold)

        ds_result = DataFrame.create_dataframe(None,
                                               records=predictions['data'],
                                               features=predictions['columns'])
        ds_result.options['data_path'] = filename
        return ModelHelper.save_prediction_result(ds_result,
                                                  prediction_id=None,
                                                  support_review_model=False,
                                                  json_result=False,
                                                  count_in_result=False,
                                                  prediction_date=None,
                                                  model_path=None,
                                                  model_id=model_id,
                                                  output=output)
示例#9
0
    def test_save_prediction(self):
        model_path = 'tests/fixtures/test_predict_by_model/iris'
        options = fsclient.read_json_file(
            os.path.join(model_path, "options.json"))

        prediction_id = "123"
        prediction_date = "today"
        results_file_path = os.path.join(
            model_path, "predictions",
            prediction_date + '_' + prediction_id + "_results.feather.zstd")
        predicted_file_path = os.path.join(
            model_path, "predictions", "iris_test_" + prediction_id + "_" +
            options.get('uid') + "_predicted.csv")

        ds = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        fsclient.remove_file(results_file_path)
        self.assertFalse(fsclient.is_file_exists(results_file_path))
        fsclient.remove_file(predicted_file_path)
        self.assertFalse(fsclient.is_file_exists(predicted_file_path))

        res = ModelHelper.save_prediction(ds,
                                          prediction_id,
                                          support_review_model=True,
                                          json_result=False,
                                          count_in_result=False,
                                          prediction_date=prediction_date,
                                          model_path=model_path,
                                          model_id=options.get('uid'))
        self.assertEqual(res, predicted_file_path)
        self.assertTrue(fsclient.is_file_exists(predicted_file_path))
        self.assertTrue(fsclient.is_file_exists(results_file_path))

        ds = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        fsclient.remove_file(results_file_path)
        self.assertFalse(fsclient.is_file_exists(results_file_path))
        fsclient.remove_file(predicted_file_path)
        self.assertFalse(fsclient.is_file_exists(predicted_file_path))

        res = ModelHelper.save_prediction(ds,
                                          prediction_id,
                                          support_review_model=True,
                                          json_result=True,
                                          count_in_result=False,
                                          prediction_date=prediction_date,
                                          model_path=model_path,
                                          model_id=options.get('uid'))
        res = json.loads(res)
        self.assertEqual(res['columns'], ds.columns)
        self.assertEqual(len(res['data']), 6)

        ds = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        fsclient.remove_file(results_file_path)
        self.assertFalse(fsclient.is_file_exists(results_file_path))
        fsclient.remove_file(predicted_file_path)
        self.assertFalse(fsclient.is_file_exists(predicted_file_path))

        ds.options['data_path'] = None
        res = ModelHelper.save_prediction(ds,
                                          prediction_id,
                                          support_review_model=False,
                                          json_result=False,
                                          count_in_result=False,
                                          prediction_date=prediction_date,
                                          model_path=model_path,
                                          model_id=options.get('uid'))
        self.assertEqual(type(res[0]), dict)
        self.assertEqual(res[0][options['targetFeature']], 'setosa')

        ds = DataFrame.create_dataframe(
            os.path.join(model_path, "iris_test.csv"))
        fsclient.remove_file(results_file_path)
        self.assertFalse(fsclient.is_file_exists(results_file_path))
        fsclient.remove_file(predicted_file_path)
        self.assertFalse(fsclient.is_file_exists(predicted_file_path))

        ds.options['data_path'] = None
        ds.loaded_columns = ds.columns
        res = ModelHelper.save_prediction(ds,
                                          prediction_id,
                                          support_review_model=False,
                                          json_result=False,
                                          count_in_result=False,
                                          prediction_date=prediction_date,
                                          model_path=model_path,
                                          model_id=options.get('uid'))
        self.assertEqual(res['columns'], ds.columns)
        self.assertEqual(len(res['data']), 6)
        self.assertEqual(type(res['data'][0]), list)
示例#10
0
    def test_calculate_proba_target(self):
        res = ModelHelper.calculate_proba_target([[0.2, 0.8], [0.9, 0.1]],
                                                 [0, 1], ["f", "t"], 0.5)
        self.assertEqual(res, [1, 0])

        res = ModelHelper.calculate_proba_target([[0.2, 0.8], [0.9, 0.1]],
                                                 [0, 1], ["f", "t"], 0.9)
        self.assertEqual(res, [0, 0])

        res = ModelHelper.calculate_proba_target([[0.2, 0.8], [0.6, 0.4]],
                                                 [0, 1], ["f", "t"], 0.7)
        self.assertEqual(res, [1, 0])

        res = ModelHelper.calculate_proba_target([[0.2, 0.8], [0.6, 0.4]],
                                                 [0, 1], ["f", "t"], 0.4)
        self.assertEqual(res, [1, 1])

        res = ModelHelper.calculate_proba_target([[0.2, 0.8], [0.6, 0.4]],
                                                 [0, 1], ["f", "t"], "0.4")
        self.assertEqual(res, [1, 1])

        res = ModelHelper.calculate_proba_target([[0.2, 0.8], [0.6, 0.4]],
                                                 [0, 1], ["f", "t"],
                                                 {"t": 0.7})
        self.assertEqual(res, [1, 0])

        res = ModelHelper.calculate_proba_target([[0.2, 0.8], [0.6, 0.4]],
                                                 [0, 1], ["f", "t"],
                                                 0.7,
                                                 minority_target_class="t")
        self.assertEqual(res, [1, 0])

        res = ModelHelper.calculate_proba_target([[0.2, 0.8], [0.6, 0.4]],
                                                 [0, 1], ["f", "t"],
                                                 {"t": 0.4})
        self.assertEqual(res, [1, 1])

        res = ModelHelper.calculate_proba_target([[0.2, 0.8], [0.6, 0.4]],
                                                 [0, 1], ["f", "t"],
                                                 "{\"t\": 0.4}")
        self.assertEqual(res, [1, 1])

        res = ModelHelper.calculate_proba_target(
            [[0.2, 0.3, 0.5], [0.6, 0.1, 0.3]], [0, 1, 2], ["f", "t", "u"],
            0.4)
        self.assertEqual(res, [2, 0])

        res = ModelHelper.calculate_proba_target(
            [[0.2, 0.2, 0.6], [0.6, 0.1, 0.3]], [0, 1, 2], ["f", "t", "u"],
            {"u": 0.6})
        self.assertEqual(res, [2, 0])

        res = ModelHelper.calculate_proba_target(
            [[0.2, 0.2, 0.6], [0.6, 0.1, 0.3]], [0, 1, 2], ["f", "t", "u"],
            0.6,
            minority_target_class="u")
        self.assertEqual(res, [2, 0])

        res = ModelHelper.calculate_proba_target(
            [[0.2, 0.2, 0.6], [0.1, 0.6, 0.3]], [0, 1, 2], ["f", "t", "u"],
            {"t": 0.6})
        self.assertEqual(res, [2, 1])

        res = ModelHelper.calculate_proba_target(
            [[0.9957942056, 0.0042057944]], [0, 1], [" <=50K", " >50K"],
            0.000001,
            minority_target_class=" >50K")
        self.assertEqual(res, [1])