def __init__(self, provider: str, api_spec: dict, model_dir: str): """ Args: provider: "aws" or "gcp". api_spec: API configuration. model_dir: Where the models are stored on disk. """ self.provider = provider self.type = predictor_type_from_api_spec(api_spec) self.path = api_spec["predictor"]["path"] self.config = api_spec["predictor"].get("config", {}) self.api_spec = api_spec self.crons = [] if not are_models_specified(self.api_spec): return self.model_dir = model_dir self.caching_enabled = self._is_model_caching_enabled() self.multiple_processes = self.api_spec["predictor"][ "processes_per_replica"] > 1 # model caching can only be enabled when processes_per_replica is 1 # model side-reloading is supported for any number of processes_per_replica if self.caching_enabled: if self.type == PythonPredictorType: mem_cache_size = self.api_spec["predictor"][ "multi_model_reloading"]["cache_size"] disk_cache_size = self.api_spec["predictor"][ "multi_model_reloading"]["disk_cache_size"] else: mem_cache_size = self.api_spec["predictor"]["models"][ "cache_size"] disk_cache_size = self.api_spec["predictor"]["models"][ "disk_cache_size"] self.models = ModelsHolder( self.type, self.model_dir, mem_cache_size=mem_cache_size, disk_cache_size=disk_cache_size, on_download_callback=model_downloader, ) elif not self.caching_enabled and self.type not in [ TensorFlowPredictorType, TensorFlowNeuronPredictorType, ]: self.models = ModelsHolder(self.type, self.model_dir) else: self.models = None if self.multiple_processes: self.models_tree = None else: self.models_tree = ModelsTree()
def initialize_client( self, tf_serving_host: Optional[str] = None, tf_serving_port: Optional[str] = None ) -> Union[PythonClient, TensorFlowClient, ONNXClient]: """ Initialize client that gives access to models specified in the API spec (cortex.yaml). Only applies when models are provided in the API spec. Args: tf_serving_host: Host of TF serving server. To be only used when the TensorFlow predictor is used. tf_serving_port: Port of TF serving server. To be only used when the TensorFlow predictor is used. Return: The client for the respective predictor type. """ client = None if are_models_specified(self.api_spec): if self.type == PythonPredictorType: client = PythonClient(self.api_spec, self.models, self.model_dir, self.models_tree) if self.type in [ TensorFlowPredictorType, TensorFlowNeuronPredictorType ]: tf_serving_address = tf_serving_host + ":" + tf_serving_port client = TensorFlowClient( tf_serving_address, self.api_spec, self.models, self.model_dir, self.models_tree, ) if not self.caching_enabled: cron = TFSAPIServingThreadUpdater(interval=5.0, client=client._client) cron.start() if self.type == ONNXPredictorType: client = ONNXClient(self.api_spec, self.models, self.model_dir, self.models_tree) return client
def initialize_impl( self, project_dir: str, client: Union[PythonClient, TensorFlowClient], metrics_client: DogStatsd, job_spec: Optional[Dict[str, Any]] = None, proto_module_pb2: Optional[Any] = None, ): """ Initialize predictor class as provided by the user. job_spec is a dictionary when the "kind" of the API is set to "BatchAPI". Otherwise, it's None. proto_module_pb2 is a module of the compiled proto when grpc is enabled for the "RealtimeAPI" kind. Otherwise, it's None. Can raise UserRuntimeException/UserException/CortexException. """ # build args class_impl = self.class_impl(project_dir) constructor_args = inspect.getfullargspec(class_impl.__init__).args config = deepcopy(self.config) args = {} if job_spec is not None and job_spec.get("config") is not None: util.merge_dicts_in_place_overwrite(config, job_spec["config"]) if "config" in constructor_args: args["config"] = config if "job_spec" in constructor_args: args["job_spec"] = job_spec if "metrics_client" in constructor_args: args["metrics_client"] = metrics_client if "proto_module_pb2" in constructor_args: args["proto_module_pb2"] = proto_module_pb2 # initialize predictor class try: if self.type == PythonPredictorType: if are_models_specified(self.api_spec): args["python_client"] = client # set load method to enable the use of the client in the constructor # setting/getting from self in load_model won't work because self will be set to None client.set_load_method(lambda model_path: class_impl. load_model(None, model_path)) initialized_impl = class_impl(**args) client.set_load_method(initialized_impl.load_model) else: initialized_impl = class_impl(**args) if self.type in [ TensorFlowPredictorType, TensorFlowNeuronPredictorType ]: args["tensorflow_client"] = client initialized_impl = class_impl(**args) except Exception as e: raise UserRuntimeException(self.path, "__init__", str(e)) from e # initialize the crons if models have been specified and if the API kind is RealtimeAPI if are_models_specified( self.api_spec) and self.api_spec["kind"] == "RealtimeAPI": if not self.multiple_processes and self.caching_enabled: self.crons += [ ModelTreeUpdater( interval=10, api_spec=self.api_spec, tree=self.models_tree, ondisk_models_dir=self.model_dir, ), ModelsGC( interval=10, api_spec=self.api_spec, models=self.models, tree=self.models_tree, ), ] if not self.caching_enabled and self.type == PythonPredictorType: self.crons += [ FileBasedModelsGC(interval=10, models=self.models, download_dir=self.model_dir) ] for cron in self.crons: cron.start() return initialized_impl