Пример #1
0
def test_modelstore_create(makedirs, path_exists, modelstore_config):
    _ = r.ModelStore(modelstore_config)
    makedirs.assert_called_once_with(
        modelstore_config["model_store"]["location"]
    )

    path_exists.reset_mock()
    makedirs.reset_mock()
    path_exists.return_value = True
    _ = r.ModelStore(modelstore_config)
    assert not makedirs.called
Пример #2
0
def test_list_models_ignore_obsolete_backups(_load_metadata, path_exists,
                                             modelstore_config, caplog):
    model_jsons = [
        "mymodel_1.0.0.json",
        "mymodel_1.1.0.json",
        "anothermodel_0.0.1.json",
    ]
    backup_jsons = [
        "mymodel_1.0.0_2021-08-01_18-00-00.json",
        "OBSOLETE-IGNORED_1.0.0_2021-07-31_12-00-00",
    ]

    def my_glob(pattern):
        if "previous" in pattern.lower():
            return backup_jsons
        else:
            return model_jsons

    with mock.patch(
            "{}.glob.glob".format(r.__name__),
            side_effect=my_glob,
    ):
        with caplog.at_level(logging.DEBUG):
            ms = r.ModelStore(modelstore_config)
            models = ms.list_models()

    print(models)
    assert "ignoring" in caplog.text.lower()
Пример #3
0
def test_list_models(_load_metadata, path_exists, modelstore_config, caplog):
    model_jsons = [
        "mymodel_1.0.0.json",
        "mymodel_1.1.0.json",
        "anothermodel_0.0.1.json",
    ]
    backup_jsons = [
        "mymodel_1.0.0_2021-08-01_18-00-00.json",
        "mymodel_1.0.0_2021-07-31_12-00-00",
    ]

    def my_glob(pattern):
        if "previous" in pattern.lower():
            return backup_jsons
        else:
            return model_jsons

    with mock.patch(
            "{}.glob.glob".format(r.__name__),
            side_effect=my_glob,
    ):
        with caplog.at_level(logging.DEBUG):
            ms = r.ModelStore(modelstore_config)
            models = ms.list_models()

    print(models)
    assert (len(models) == 2
            )  # one model ID named "mymodel" and one named "anothermodel"
    assert models["mymodel"]["latest"] == models["mymodel"]["1.1.0"]
    assert len(models["mymodel"]["backups"]) == 2
    assert models["anothermodel"]["backups"] == []
    assert "ignoring" not in caplog.text.lower()
Пример #4
0
def _get_model_store(complete_conf, cache=True):
    global _cached_model_stores

    key = _model_key(complete_conf["model"])

    model_store = _cached_model_stores.get(key) or resource.ModelStore(
        complete_conf)

    if cache:
        _cached_model_stores[key] = model_store

    return model_store
Пример #5
0
def test_modelstore_update_model_metrics(dump, load, modelstore_config):
    new_metrics = {"a": 1}
    ms = r.ModelStore(modelstore_config)
    ms.update_model_metrics(modelstore_config["model"], new_metrics)
    load.assert_called_once()
    dump.assert_called_once()
    name, contents = dump.call_args[0]
    assert contents["metrics"] == new_metrics
    hist = contents["metrics_history"]
    assert len(hist) == 2
    del hist["0123"]
    assert hist.popitem()[1] == new_metrics
Пример #6
0
def test_modelstore_load(json, pkl, modelstore_config):
    with mock.patch("{}.open".format(r.__name__),
                    mock.mock_open(),
                    create=True) as mo:
        ms = r.ModelStore(modelstore_config)
        ms.load_trained_model(modelstore_config["model"])

    model_conf = modelstore_config["model"]
    base_name = os.path.join(
        modelstore_config["model_store"]["location"],
        "{}_{}".format(model_conf["name"], model_conf["version"]),
    )
    calls = [
        mock.call("{}.pkl".format(base_name), "rb"),
        mock.call("{}.json".format(base_name), "r", encoding="utf-8"),
    ]
    mo.assert_has_calls(calls, any_order=True)
Пример #7
0
def test_modelstore_dump(glob, copy, makedirs, path_exists, modelstore_config):
    with mock.patch(
        "{}.open".format(r.__name__), mock.mock_open(), create=True
    ) as mo:
        ms = r.ModelStore(modelstore_config)
        ms.dump_trained_model(modelstore_config, {"hi": 1}, {"there": 2})

    model_conf = modelstore_config["model"]
    base_name = os.path.join(
        modelstore_config["model_store"]["location"],
        "{}_{}".format(model_conf["name"], model_conf["version"]),
    )
    calls = [
        mock.call("{}.pkl".format(base_name), "wb"),
        mock.call("{}.json".format(base_name), "w"),
    ]
    mo.assert_has_calls(calls, any_order=True)
Пример #8
0
def test_modelstore_dump_extra_model_keys(jsond, pickled, path_exists,
                                          modelstore_config):
    modelstore_config["model"]["extraparam"] = 42
    modelstore_config["model"]["anotherparam"] = 23
    modelstore_config["model"]["created"] = "colliding keys should not occur"
    with mock.patch("{}.open".format(r.__name__),
                    mock.mock_open(),
                    create=True) as _:
        ms = r.ModelStore(modelstore_config)
        ms.dump_trained_model(modelstore_config, {"pseudo_model": 1},
                              {"pseudo_metrics": 2})

    dumped = jsond.call_args[0][0]
    print(modelstore_config)
    print(dumped)
    assert "extraparam" in dumped
    assert dumped["extraparam"] == 42
    assert "anotherparam" in dumped
    assert dumped["anotherparam"] == 23
    assert dumped["created"] != "colliding keys should not occur"
Пример #9
0
def test_modelstore_train_report(jsond, pickled, path_exists,
                                 modelstore_config):
    with mock.patch("{}.open".format(r.__name__),
                    mock.mock_open(),
                    create=True) as _:
        ms = r.ModelStore(modelstore_config)
        ms.add_to_train_report("report_key", "report_val")
        ms.dump_trained_model(modelstore_config, {"pseudo_model": 1},
                              {"pseudo_metrics": 2})

    dumped = jsond.call_args[0][0]
    print(modelstore_config)
    print(dumped)
    assert "train_report" in dumped
    assert "report_key" in dumped["train_report"]
    assert dumped["train_report"]["report_key"] == "report_val"

    assert "system" in dumped
    assert "mllaunchpad_version" in dumped["system"]
    assert "platform" in dumped["system"]
    assert "packages" in dumped["system"]
Пример #10
0
def list_models(model_store_location_or_config_dict: Union[Dict, str]):
    """Get information on all available versions of trained models.

    :param model_store_location_or_config_dict: Location of the model store. If you have a config dict available, use that instead.
    :type model_store_location_or_config_dict: Union[Dict, str]

    Side note: The return value includes backups of models that have been re-trained without changing
    the version number (they reside in the subdirectory ``previous``).
    Please note that these backed up models are just listed for information and are not available
    for loading (one would have to restore them by moving them up a directory level from ``previous``.

    Example::

        import mllaunchpad as mllp
        my_cfg = mllp.get_validated_config("./my_config_file.yml")
        all_models = mllp.list_models(my_cfg)  # also accepts model store location string

        # An example of what a ``list_models()``'s result would look like:
        # {
        #     iris: {
        #         1.0.0: { ... complete metadata of this version number ... },
        #         1.1.0: { ... },
        #         latest: { ... duplicate of metadata of highest available version number, here 1.1.0 ... },
        #         backups: [ {...}, {...}, ... ]
        #     },
        #     my_other_model: {
        #         1.0.1: { ... },
        #         2.0.0: { ... },
        #         latest: { ... },
        #         backups: []
        #     }
        # }

    :returns: Dict with information on all available trained models.
    """
    ms = resource.ModelStore(model_store_location_or_config_dict)
    return ms.list_models()
Пример #11
0
    def __init__(self, config, application, debug=False):
        """When initializing ModelApi, your model will be automatically
        retrieved from the model store based on the currently active
        configuration.

        Params:
            config:       configuration dictionary to use
            application:  flask application to use
            debug:        use current prediction code instead of that of persisted model
        """
        self.model_config = config["model"]
        model_store = resource.ModelStore(config)
        self.model_wrapper = self._load_model(model_store, self.model_config)
        if debug:
            # TODO: Hacky, should use model_actions functionality for a lot of API functionality instead of duplicating.
            # Create a fresh model object from current code and transplant existing contents
            m_cls = model_actions._get_model_class(config, cache=True)
            curr_model_wrapper = m_cls(contents=self.model_wrapper.contents)
            self.model_wrapper = curr_model_wrapper

        # Workaround (tensorflow has problem with spontaneously created threads such as with Flask):
        # https://kobkrit.com/tensor-something-is-not-an-element-of-this-graph-error-in-keras-on-flask-web-server-4173a8fe15e1
        try:
            import tensorflow as tf

            graph = tf.get_default_graph()
            self.model_wrapper.__graph = graph
        except Exception as e:
            logger.debug(
                'Optional tensorflow/flask workaround for "<tensor> is not an element of this graph" problem'
                + "resulted in: %s",
                e,
            )
        else:
            logger.info(
                'Stored tensorflow model\'s graph - tensorflow/flask workaround for "<tensor> is not an element of this graph" problem'
            )
        self.datasources, self.datasinks = self._init_datasources(config)

        logger.debug("Initializing RESTful API")
        api = Api(application)

        api_url = get_api_base_url(config)

        raml = _load_raml(config)
        res_normal, res_with_id, res_file = _get_resources(raml)

        if res_file or res_normal:
            resource_urls = {"query": None, "file": None}
            parsers = {"query": None, "file": None}
            if res_normal:
                logger.debug(
                    "Adding query resource %s to api %s",
                    res_normal.path,
                    api_url,
                )
                resource_urls["query"] = api_url + res_normal.path
                parsers["query"] = _create_request_parser(res_normal)
            if res_file:
                logger.debug(
                    "Adding file-based resource %s to api %s",
                    res_file.path,
                    api_url,
                )
                resource_urls["file"] = api_url + res_file.path
                parsers["file"] = _create_request_parser(res_file)
            if (
                len(resource_urls) == 2
                and resource_urls["query"] == resource_urls["file"]
            ):
                api.add_resource(
                    QueryOrFileUploadResource,  # QueryResource,
                    resource_urls["query"],
                    resource_class_kwargs={
                        "model_api_obj": self,
                        "query_parser": parsers["query"],
                        "file_parser": parsers["file"],
                    },
                )
            else:
                for k, res_url in resource_urls.items():
                    if not res_url:
                        continue
                    api.add_resource(
                        QueryOrFileUploadResource,  # QueryResource,
                        res_url,
                        resource_class_kwargs={
                            "model_api_obj": self,
                            "query_parser": parsers["query"]
                            if k == "query"
                            else None,
                            "file_parser": parsers["file"]
                            if k == "file"
                            else None,
                        },
                    )

        if res_with_id:
            logger.debug(
                "Adding url-id resource %s to api %s",
                res_with_id.path,
                api_url,
            )
            uri_param_name = res_with_id.uri_params[-1].name
            resource_url = (
                api_url
                + res_with_id.parent.path
                + "/<string:some_resource_id>"
            )
            parser = _create_request_parser(res_with_id)
            api.add_resource(
                GetByIdResource,
                resource_url,
                resource_class_kwargs={
                    "model_api_obj": self,
                    "parser": parser,
                    "id_name": uri_param_name,
                },
            )