def _start_prod_batching_server( saved_bundle_path: str, api_server_port: int, config: BentoMLConfiguration, prometheus_lock: Optional[multiprocessing.Lock] = None, ): logger.info("Starting BentoML Batching server in production mode..") container = BentoMLContainer() container.config.from_dict(config.as_dict()) from bentoml import marshal from bentoml.server.marshal_server import GunicornMarshalServer container.wire(packages=[sys.modules[__name__], marshal]) # avoid load model before gunicorn fork marshal_server = GunicornMarshalServer( bundle_path=saved_bundle_path, prometheus_lock=prometheus_lock, outbound_host="localhost", outbound_port=api_server_port, ) marshal_server.run()
def serve( port, bento, enable_microbatch, mb_max_batch_size, mb_max_latency, run_with_ngrok, yatai_url, enable_swagger, config, ): saved_bundle_path = resolve_bundle_path( bento, pip_installed_bundle_path, yatai_url ) container = BentoMLContainer() config = BentoMLConfiguration(override_config_file=config) config.override(["api_server", "port"], port) config.override(["api_server", "enable_microbatch"], enable_microbatch) config.override(["api_server", "run_with_ngrok"], run_with_ngrok) config.override(["api_server", "enable_swagger"], enable_swagger) config.override(["marshal_server", "max_batch_size"], mb_max_batch_size) config.override(["marshal_server", "max_latency"], mb_max_latency) container.config.from_dict(config.as_dict()) from bentoml import marshal, server container.wire(packages=[marshal, server]) start_dev_server(saved_bundle_path)
def _start_prod_server( saved_bundle_path: str, config: BentoMLConfiguration, port: Optional[int] = None, prometheus_lock: Optional[multiprocessing.Lock] = None, ): logger.info("Starting BentoML API server in production mode..") container = BentoMLContainer() container.config.from_dict(config.as_dict()) container.wire(packages=[sys.modules[__name__]]) from bentoml.server.gunicorn_server import GunicornBentoServer if port is None: gunicorn_app = GunicornBentoServer( saved_bundle_path, prometheus_lock=prometheus_lock, ) else: gunicorn_app = GunicornBentoServer( saved_bundle_path, port=port, prometheus_lock=prometheus_lock, ) gunicorn_app.run()
def inject_dependencies(): """Inject dependencies and configuration to BentoML packages""" from timeit import default_timer as timer start = timer() logger.debug("Start dependency injection") from bentoml.configuration.containers import BentoMLContainer, BentoMLConfiguration config_file = get_local_config_file() if config_file.endswith(".yml"): configuration = BentoMLConfiguration(override_config_file=config_file) else: configuration = BentoMLConfiguration() container = BentoMLContainer() container.config.from_dict(configuration.as_dict()) from bentoml import marshal, server, tracing, cli container.wire(packages=[marshal, server, tracing, cli]) end = timer() logger.debug("Dependency injection completed in %.3f seconds", end - start)
def inject_dependencies(): """Inject dependencies and configuration to BentoML packages""" from timeit import default_timer as timer start = timer() logger.debug("Start dependency injection") from bentoml.configuration.containers import BentoMLContainer, BentoMLConfiguration config_file = get_local_config_file() if config_file and config_file.endswith(".yml"): configuration = BentoMLConfiguration(override_config_file=config_file) else: configuration = BentoMLConfiguration() container = BentoMLContainer() container.config.from_dict(configuration.as_dict()) from bentoml import ( marshal, server, tracing, cli, adapters, saved_bundle, service, ) from bentoml.yatai import yatai_service from bentoml.yatai import yatai_service_impl from bentoml.yatai.repository import s3_repository, gcs_repository container.wire( modules=[ yatai_service, s3_repository, gcs_repository, yatai_service_impl ], packages=[ marshal, server, tracing, cli, adapters, saved_bundle, service ], ) end = timer() logger.debug("Dependency injection completed in %.3f seconds", end - start)
def serve_gunicorn( port, workers, timeout, bento, enable_microbatch, mb_max_batch_size, mb_max_latency, microbatch_workers, yatai_url, enable_swagger, config, ): if not psutil.POSIX: _echo( "The `bentoml serve-gunicorn` command is only supported on POSIX. " "On windows platform, use `bentoml serve` for local API testing and " "docker for running production API endpoint: " "https://docs.docker.com/docker-for-windows/ ") return saved_bundle_path = resolve_bundle_path(bento, pip_installed_bundle_path, yatai_url) container = BentoMLContainer() config = BentoMLConfiguration(override_config_file=config) config.override(["api_server", "port"], port) config.override(["api_server", "workers"], workers) config.override(["api_server", "timeout"], timeout) config.override(["api_server", "enable_microbatch"], enable_microbatch) config.override(["api_server", "enable_swagger"], enable_swagger) config.override(["marshal_server", "max_batch_size"], mb_max_batch_size) config.override(["marshal_server", "max_latency"], mb_max_latency) config.override(["marshal_server", "workers"], microbatch_workers) container.config.from_dict(config.as_dict()) from bentoml import marshal, server container.wire(packages=[marshal, server]) start_prod_server(saved_bundle_path)
def run(api_name, config, run_args, bento=None): container = BentoMLContainer() config = BentoMLConfiguration(override_config_file=config) container.config.from_dict(config.as_dict()) from bentoml import tracing container.wire(modules=[tracing]) parser = argparse.ArgumentParser() parser.add_argument('--yatai-url', type=str, default=None) parsed_args, _ = parser.parse_known_args(run_args) yatai_url = parsed_args.yatai_url saved_bundle_path = resolve_bundle_path(bento, pip_installed_bundle_path, yatai_url) api = load_bento_service_api(saved_bundle_path, api_name) exit_code = api.handle_cli(run_args) sys.exit(exit_code)
def _start_dev_server( saved_bundle_path: str, api_server_port: int, config: BentoMLConfiguration, ): logger.info("Starting BentoML API server in development mode..") from bentoml.saved_bundle import load_from_dir bento_service = load_from_dir(saved_bundle_path) from bentoml.server.api_server import BentoAPIServer container = BentoMLContainer() container.config.from_dict(config.as_dict()) container.wire(packages=[sys.modules[__name__]]) api_server = BentoAPIServer(bento_service) api_server.start(port=api_server_port)
def test_api_server_workers(): container = BentoMLContainer() config_auto_workers = tempfile.NamedTemporaryFile(delete=False) config_auto_workers.write(b""" api_server: workers: Null """) config_auto_workers.close() container.config.from_dict( BentoMLConfiguration( default_config_file=config_auto_workers.name, validate_schema=False, legacy_compatibility=False, ).as_dict(), ) os.remove(config_auto_workers.name) workers = container.api_server_workers() assert workers is not None assert workers > 0 config_manual_workers = tempfile.NamedTemporaryFile(delete=False) config_manual_workers.write(b""" api_server: workers: 42 """) config_manual_workers.close() container.config.from_dict( BentoMLConfiguration( default_config_file=config_manual_workers.name, validate_schema=False, legacy_compatibility=False, ).as_dict(), ) os.remove(config_manual_workers.name) workers = container.api_server_workers() assert workers is not None assert workers == 42
def _start_dev_proxy( saved_bundle_path: str, api_server_port: int, config: BentoMLConfiguration, ): logger.info("Starting BentoML API proxy in development mode..") from bentoml import marshal container = BentoMLContainer() container.config.from_dict(config.as_dict()) container.wire(packages=[marshal]) from bentoml.marshal.marshal import MarshalService marshal_server = MarshalService( saved_bundle_path, outbound_host="localhost", outbound_port=api_server_port, ) marshal_server.fork_start_app()