def models(name, framework, engine, version, list_all, quiet): payload = remove_dict_null({ 'name': name, 'framework': framework, 'engine': engine, 'version': version }) with requests.get(f'http://{SERVER_HOST}:{SERVER_PORT}/api/v1/model/', params=payload) as r: model_list = r.json() model_view([model_list], list_all=list_all, quiet=quiet)
def show_models(name, framework, engine, version, list_all): """show a table that lists all models published in mlmodelci Args: name ([type]): [description] framework ([type]): [description] engine ([type]): [description] version ([type]): [description] list_all ([type]): [description] """ payload = remove_dict_null({ 'name': name, 'framework': framework, 'engine': engine, 'version': version }) with requests.get(f'http://{SERVER_HOST}:{SERVER_PORT}/api/v1/model/', params=payload) as r: model_list = r.json() model_view([model_list], list_all=list_all)
def list_models( architecture: Optional[str] = typer.Option(None, '-n', '--name', help='Model architecture name'), framework: Optional[Framework] = typer.Option(None, '-fw', '--framework', case_sensitive=False, help='Framework'), engine: Optional[Engine] = typer.Option(None, '-e', '--engine', case_sensitive=False, help='Serving engine'), version: Optional[int] = typer.Option(None, '-v', '--version', help='Version'), list_all: Optional[bool] = typer.Option( False, '-a', '--all', is_flag=True, help='Display queried models. otherwise, only partial result will be shown.' ), ): """Show a table that lists all models published in MLModelCI""" payload = remove_dict_null( {'architecture': architecture, 'framework': framework, 'engine': engine, 'version': version} ) with requests.get(f'{app_settings.api_v1_prefix}/model', params=payload) as r: model_list = r.json() model_view([MLModel.parse_obj(model) for model in model_list], list_all=list_all)
def serve( save_path: Union[Path, str], device: str = 'cpu', name: str = None, batch_size: int = 16, ) -> Container: """Serve the given model save path in a Docker container. Args: save_path (Union[Path, str]): Saved path to the model. device (str): Device name. E.g.: cpu, cuda, cuda:1. name (str): Container name. Default to None. batch_size (int): Batch size for passing to serving containers. Returns: Container: Docker container object created. """ info = parse_path(Path(save_path)) architecture: str = info['architecture'] engine: Engine = info['engine'] cuda, device_num = get_device(device) docker_client = docker.from_env() # set mount mounts = [ Mount(target=f'/models/{architecture}', source=str(info['base_dir']), type='bind', read_only=True) ] common_kwargs = remove_dict_null({ 'detach': True, 'auto_remove': True, 'mounts': mounts, 'name': name }) environment = dict() if cuda: common_kwargs['runtime'] = 'nvidia' environment['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' environment['CUDA_VISIBLE_DEVICES'] = device_num if engine == Engine.TFS: # Tensorflow Serving 2.2.0 has the issue: https://github.com/tensorflow/serving/issues/1663 docker_tag = '2.1.0-gpu' if cuda else '2.1.0' ports = {'8501': config.TFS_HTTP_PORT, '8500': config.TFS_GRPC_PORT} environment['MODEL_NAME'] = architecture container = docker_client.containers.run( f'tensorflow/serving:{docker_tag}', environment=environment, ports=ports, **common_kwargs) elif engine == Engine.TORCHSCRIPT: docker_tag = 'latest-gpu' if cuda else 'latest' ports = { '8000': config.TORCHSCRIPT_HTTP_PORT, '8001': config.TORCHSCRIPT_GRPC_PORT } environment['MODEL_NAME'] = architecture container = docker_client.containers.run( f'mlmodelci/pytorch-serving:{docker_tag}', environment=environment, ports=ports, **common_kwargs) elif engine == Engine.ONNX: docker_tag = 'latest-gpu' if cuda else 'latest' ports = {'8000': config.ONNX_HTTP_PORT, '8001': config.ONNX_GRPC_PORT} environment['MODEL_NAME'] = architecture container = docker_client.containers.run( f'mlmodelci/onnx-serving:{docker_tag}', environment=environment, ports=ports, **common_kwargs) elif engine == Engine.TRT: if not cuda: raise RuntimeError( 'TensorRT cannot be run without CUDA. Please specify a CUDA device.' ) ports = { '8000': config.TRT_HTTP_PORT, '8001': config.TRT_GRPC_PORT, '8002': config.TRT_PROMETHEUS_PORT } ulimits = [ Ulimit(name='memlock', soft=-1, hard=-1), Ulimit(name='stack', soft=67100864, hard=67100864) ] trt_kwargs = {'ulimits': ulimits, 'shm_size': '1G'} container = docker_client.containers.run( f'nvcr.io/nvidia/tensorrtserver:19.10-py3', 'trtserver --model-repository=/models', environment=environment, ports=ports, **common_kwargs, **trt_kwargs, ) else: raise RuntimeError( f'Not able to serve model with path `{str(save_path)}`.') return container
def models(name, framework, engine, version, list_all, quiet): payload = remove_dict_null({'name': name, 'framework': framework, 'engine': engine, 'version': version}) with requests.get(f'{app_settings.api_v1_prefix}/model', params=payload) as r: model_list = r.json() model_view([MLModel.parse_obj(model) for model in model_list], list_all=list_all, quiet=quiet)