def delete_model(self, model_name): model_server_config = model_server_config_pb2.ModelServerConfig() config_list = model_server_config_pb2.ModelConfigList() with lock(DEFAULT_LOCK_FILE): try: config_file = self._read_model_config(MODEL_CONFIG_FILE) config_list_text = config_file.strip('\n').strip('}').strip( 'model_config_list: {') config_list = text_format.Parse(text=config_list_text, message=config_list) for config in config_list.config: if config.name == model_name: config_list.config.remove(config) model_server_config.model_config_list.CopyFrom( config_list) req = model_management_pb2.ReloadConfigRequest() req.config.CopyFrom(model_server_config) self.stub.HandleReloadConfigRequest( request=req, timeout=GRPC_REQUEST_TIMEOUT_IN_SECONDS, wait_for_ready=True) return self._delete_model_from_config_file( model_server_config) # no such model exists raise FileNotFoundError except grpc.RpcError as e: if e.code() is grpc.StatusCode.DEADLINE_EXCEEDED: raise MultiModelException(408, e.details()) raise MultiModelException(500, e.details()) return 'Model {} unloaded.'.format(model_name)
def on_delete(self, req, res, model_name): # pylint: disable=W0613 if model_name not in self._model_tfs_pid: res.status = falcon.HTTP_404 res.body = json.dumps( {'error': 'Model {} is not loaded yet'.format(model_name)}) else: try: self._model_tfs_pid[model_name].kill() os.remove('/sagemaker/tfs-config/{}/model-config.cfg'.format( model_name)) os.rmdir('/sagemaker/tfs-config/{}'.format(model_name)) release_rest_port = self._model_tfs_rest_port[model_name] release_grpc_port = self._model_tfs_grpc_port[model_name] with lock(): bisect.insort(self._tfs_ports['rest_port'], release_rest_port) bisect.insort(self._tfs_ports['grpc_port'], release_grpc_port) del self._model_tfs_rest_port[model_name] del self._model_tfs_grpc_port[model_name] del self._model_tfs_pid[model_name] res.status = falcon.HTTP_200 res.body = json.dumps({ 'success': 'Successfully unloaded model {}.'.format(model_name) }) except OSError as error: res.status = falcon.HTTP_500 res.body = json.dumps({'error': str(error)}).encode('utf-8')
def add_model(self, model_name, base_path, model_platform='tensorflow'): # read model configs from existing model config file model_server_config = model_server_config_pb2.ModelServerConfig() config_list = model_server_config_pb2.ModelConfigList() with lock(DEFAULT_LOCK_FILE): try: config_file = self._read_model_config(MODEL_CONFIG_FILE) model_server_config = text_format.Parse( text=config_file, message=model_server_config) new_model_config = config_list.config.add() new_model_config.name = model_name new_model_config.base_path = base_path new_model_config.model_platform = model_platform # send HandleReloadConfigRequest to tensorflow model server model_server_config.model_config_list.MergeFrom(config_list) req = model_management_pb2.ReloadConfigRequest() req.config.CopyFrom(model_server_config) self.stub.HandleReloadConfigRequest( request=req, timeout=GRPC_REQUEST_TIMEOUT_IN_SECONDS, wait_for_ready=True) self._add_model_to_config_file(model_name, base_path, model_platform) except grpc.RpcError as e: if e.code() is grpc.StatusCode.INVALID_ARGUMENT: raise MultiModelException(409, e.details()) elif e.code() is grpc.StatusCode.DEADLINE_EXCEEDED: raise MultiModelException(408, e.details()) raise MultiModelException(500, e.details()) return 'Successfully loaded model {}'.format(model_name)
def _handle_invocation_post(self, req, res, model_name=None): if SAGEMAKER_MULTI_MODEL_ENABLED: if model_name: if model_name not in self._model_tfs_rest_port: res.status = falcon.HTTP_404 res.body = json.dumps({ 'error': "Model {} is not loaded yet.".format(model_name) }) return else: log.info("model name: {}".format(model_name)) rest_port = self._model_tfs_rest_port[model_name] log.info("rest port: {}".format( str(self._model_tfs_rest_port[model_name]))) grpc_port = self._model_tfs_grpc_port[model_name] log.info("grpc port: {}".format( str(self._model_tfs_grpc_port[model_name]))) data, context = tfs_utils.parse_request( req, rest_port, grpc_port, self._tfs_default_model_name, model_name) else: res.status = falcon.HTTP_400 res.body = json.dumps({ 'error': 'Invocation request does not contain model name.' }) else: data, context = tfs_utils.parse_request( req, self._tfs_rest_port, self._tfs_grpc_port, self._tfs_default_model_name) try: res.status = falcon.HTTP_200 if SAGEMAKER_MULTI_MODEL_ENABLED: with lock(): handlers = self.model_handlers[model_name] res.body, res.content_type = handlers(data, context) else: res.body, res.content_type = self._handlers(data, context) except Exception as e: # pylint: disable=broad-except log.exception('exception handling request: {}'.format(e)) res.status = falcon.HTTP_500 res.body = json.dumps({'error': str(e)}).encode('utf-8') # pylint: disable=E1101
def _handle_load_model_post(self, res, data): # noqa: C901 model_name = data['model_name'] base_path = data['url'] # model is already loaded if model_name in self._model_tfs_pid: res.status = falcon.HTTP_409 res.body = json.dumps( {'error': 'Model {} is already loaded.'.format(model_name)}) # check if there are available ports if not self._ports_available(): res.status = falcon.HTTP_507 res.body = json.dumps({ 'error': 'Memory exhausted: no available ports to load the model.' }) with lock(): self._model_tfs_rest_port[model_name] = self._tfs_ports[ 'rest_port'].pop() self._model_tfs_grpc_port[model_name] = self._tfs_ports[ 'grpc_port'].pop() # validate model files are in the specified base_path if self.validate_model_dir(base_path): try: # install custom dependencies, import handlers self._import_custom_modules(model_name) tfs_config = tfs_utils.create_tfs_config_individual_model( model_name, base_path) tfs_config_file = '/sagemaker/tfs-config/{}/model-config.cfg'.format( model_name) log.info('tensorflow serving model config: \n%s\n', tfs_config) os.makedirs(os.path.dirname(tfs_config_file)) with open(tfs_config_file, 'w') as f: f.write(tfs_config) batching_config_file = '/sagemaker/batching/{}/batching-config.cfg'.format( model_name) if self._tfs_enable_batching: tfs_utils.create_batching_config(batching_config_file) cmd = tfs_utils.tfs_command( self._model_tfs_grpc_port[model_name], self._model_tfs_rest_port[model_name], tfs_config_file, self._tfs_enable_batching, batching_config_file, ) p = subprocess.Popen(cmd.split()) self._wait_for_model(model_name) log.info('started tensorflow serving (pid: %d)', p.pid) # update model name <-> tfs pid map self._model_tfs_pid[model_name] = p res.status = falcon.HTTP_200 res.body = json.dumps({ 'success': 'Successfully loaded model {}, ' 'listening on rest port {} ' 'and grpc port {}.'.format( model_name, self._model_tfs_rest_port, self._model_tfs_grpc_port, ) }) except MultiModelException as multi_model_exception: self._cleanup_config_file(tfs_config_file) self._cleanup_config_file(batching_config_file) if multi_model_exception.code == 409: res.status = falcon.HTTP_409 res.body = multi_model_exception.msg elif multi_model_exception.code == 408: res.status = falcon.HTTP_408 res.body = multi_model_exception.msg else: raise MultiModelException(falcon.HTTP_500, multi_model_exception.msg) except FileExistsError as e: res.status = falcon.HTTP_409 res.body = json.dumps({ 'error': 'Model {} is already loaded. {}'.format( model_name, str(e)) }) except OSError as os_error: self._cleanup_config_file(tfs_config_file) self._cleanup_config_file(batching_config_file) if os_error.errno == 12: raise MultiModelException( falcon.HTTP_507, 'Memory exhausted: ' 'not enough memory to start TFS instance') else: raise MultiModelException(falcon.HTTP_500, os_error.strerror) else: res.status = falcon.HTTP_404 res.body = json.dumps({ 'error': 'Could not find valid base path {} for servable {}'.format( base_path, model_name) })
def _ports_available(self): with lock(): rest_ports = self._tfs_ports['rest_port'] grpc_ports = self._tfs_ports['grpc_port'] return len(rest_ports) > 0 and len(grpc_ports) > 0
def _handle_load_model_post(self, res, data): # noqa: C901 model_name = data["model_name"] base_path = data["url"] # model is already loaded if model_name in self._model_tfs_pid: res.status = falcon.HTTP_409 res.body = json.dumps( {"error": "Model {} is already loaded.".format(model_name)}) # check if there are available ports if not self._ports_available(): res.status = falcon.HTTP_507 res.body = json.dumps({ "error": "Memory exhausted: no available ports to load the model." }) with lock(): self._model_tfs_rest_port[model_name] = self._tfs_ports[ "rest_port"].pop() self._model_tfs_grpc_port[model_name] = self._tfs_ports[ "grpc_port"].pop() # validate model files are in the specified base_path if self.validate_model_dir(base_path): try: tfs_config = tfs_utils.create_tfs_config_individual_model( model_name, base_path) tfs_config_file = "/sagemaker/tfs-config/{}/model-config.cfg".format( model_name) log.info("tensorflow serving model config: \n%s\n", tfs_config) os.makedirs(os.path.dirname(tfs_config_file)) with open(tfs_config_file, "w") as f: f.write(tfs_config) batching_config_file = "/sagemaker/batching/{}/batching-config.cfg".format( model_name) if self._tfs_enable_batching: tfs_utils.create_batching_config(batching_config_file) cmd = tfs_utils.tfs_command( self._model_tfs_grpc_port[model_name], self._model_tfs_rest_port[model_name], tfs_config_file, self._tfs_enable_batching, batching_config_file, ) p = subprocess.Popen(cmd.split()) self._wait_for_model(model_name) log.info("started tensorflow serving (pid: %d)", p.pid) # update model name <-> tfs pid map self._model_tfs_pid[model_name] = p res.status = falcon.HTTP_200 res.body = json.dumps({ "success": "Successfully loaded model {}, " "listening on rest port {} " "and grpc port {}.".format( model_name, self._model_tfs_rest_port, self._model_tfs_grpc_port, ) }) except MultiModelException as multi_model_exception: self._cleanup_config_file(tfs_config_file) self._cleanup_config_file(batching_config_file) if multi_model_exception.code == 409: res.status = falcon.HTTP_409 res.body = multi_model_exception.msg elif multi_model_exception.code == 408: res.status = falcon.HTTP_408 res.body = multi_model_exception.msg else: raise MultiModelException(falcon.HTTP_500, multi_model_exception.msg) except FileExistsError as e: res.status = falcon.HTTP_409 res.body = json.dumps({ "error": "Model {} is already loaded. {}".format( model_name, str(e)) }) except OSError as os_error: self._cleanup_config_file(tfs_config_file) self._cleanup_config_file(batching_config_file) if os_error.errno == 12: raise MultiModelException( falcon.HTTP_507, "Memory exhausted: " "not enough memory to start TFS instance") else: raise MultiModelException(falcon.HTTP_500, os_error.strerror) else: res.status = falcon.HTTP_404 res.body = json.dumps({ "error": "Could not find valid base path {} for servable {}".format( base_path, model_name) })