def build_docker_container(torchserve_branch="master"): LOGGER.info(f"Setting up docker image to be used") docker_dev_image_config_path = os.path.join(os.getcwd(), "test", "benchmark", "tests", "suite", "docker", "docker.yaml") docker_config = YamlHandler.load_yaml(docker_dev_image_config_path) YamlHandler.validate_docker_yaml(docker_config) account_id = run( "aws sts get-caller-identity --query Account --output text" ).stdout.strip() for processor, config in docker_config.items(): docker_tag = None cuda_version = None for config_key, config_value in config.items(): if processor == "gpu" and config_key == "cuda_version": cuda_version = config_value if config_key == "docker_tag": docker_tag = config_value dockerImageHandler = DockerImageHandler(docker_tag, cuda_version, torchserve_branch) dockerImageHandler.build_image() dockerImageHandler.push_docker_image_to_ecr( account_id, DEFAULT_REGION, f"{DEFAULT_DOCKER_DEV_ECR_REPO}:{docker_tag}")
def test_vgg16_benchmark(ec2_connection, ec2_instance_type, vgg16_config_file_path, docker_dev_image_config_path, benchmark_execution_id): test_config = YamlHandler.load_yaml(vgg16_config_file_path) model_name = vgg16_config_file_path.split("/")[-1].split(".")[0] LOGGER.info("Validating yaml contents") LOGGER.info(YamlHandler.validate_benchmark_yaml(test_config)) docker_config = YamlHandler.load_yaml(docker_dev_image_config_path) docker_repo_tag_for_current_instance = "" cuda_version_for_instance = "" account_id = run( "aws sts get-caller-identity --query Account --output text" ).stdout.strip() for processor, config in docker_config.items(): docker_tag = None cuda_version = None for config_key, config_value in config.items(): if processor == "gpu" and config_key == "cuda_version": cuda_version = config_value if config_key == "docker_tag": docker_tag = config_value # TODO: Improve logic that selectively pulls CPU image on CPU instances and likewise for GPU. docker_repo_tag = f"{DEFAULT_DOCKER_DEV_ECR_REPO}:{docker_tag}" if ec2_instance_type[:2] in GPU_INSTANCES and "gpu" in docker_tag: dockerImageHandler = DockerImageHandler(docker_tag, cuda_version) dockerImageHandler.pull_docker_image_from_ecr( account_id, DEFAULT_REGION, docker_repo_tag, connection=ec2_connection) docker_repo_tag_for_current_instance = docker_repo_tag cuda_version_for_instance = cuda_version break if ec2_instance_type[:2] not in GPU_INSTANCES and "cpu" in docker_tag: dockerImageHandler = DockerImageHandler(docker_tag, cuda_version) dockerImageHandler.pull_docker_image_from_ecr( account_id, DEFAULT_REGION, docker_repo_tag, connection=ec2_connection) docker_repo_tag_for_current_instance = docker_repo_tag cuda_version_for_instance = cuda_version break mode_list = [] config_list = [] batch_size_list = [] processor_list = [] apacheBenchHandler = ab_utils.ApacheBenchHandler(model_name=model_name, connection=ec2_connection) for model, config in test_config.items(): for mode, mode_config in config.items(): mode_list.append(mode) benchmark_engine = mode_config.get("benchmark_engine") url = mode_config.get("url") workers = mode_config.get("workers") batch_delay = mode_config.get("batch_delay") batch_sizes = mode_config.get("batch_size") input_file = mode_config.get("input") requests = mode_config.get("requests") concurrency = mode_config.get("concurrency") backend_profiling = mode_config.get("backend_profiling") exec_env = mode_config.get("exec_env") processors = mode_config.get("processors") gpus = None if len(processors) == 2: gpus = processors[1].get("gpus") LOGGER.info(f"processors: {processors[1]}") LOGGER.info(f"gpus: {gpus}") LOGGER.info( f"\n benchmark_engine: {benchmark_engine}\n url: {url}\n workers: {workers}\n batch_delay: {batch_delay}\n batch_size:{batch_sizes}\n input_file: {input_file}\n requests: {requests}\n concurrency: {concurrency}\n backend_profiling: {backend_profiling}\n exec_env: {exec_env}\n processors: {processors}" ) torchserveHandler = ts_utils.TorchServeHandler( exec_env=exec_env, cuda_version=cuda_version, gpus=gpus, torchserve_docker_image=docker_repo_tag_for_current_instance, backend_profiling=backend_profiling, connection=ec2_connection, ) for batch_size in batch_sizes: # Start torchserve torchserveHandler.start_torchserve_docker() # Register torchserveHandler.register_model(url=url, workers=workers, batch_delay=batch_delay, batch_size=batch_size) # Run benchmark apacheBenchHandler.run_apache_bench(requests=requests, concurrency=concurrency, input_file=input_file) # Unregister torchserveHandler.unregister_model() # Stop torchserve torchserveHandler.stop_torchserve() # Generate report (note: needs to happen after torchserve has stopped) apacheBenchHandler.generate_report(requests=requests, concurrency=concurrency, connection=ec2_connection) # Move artifacts into a common folder. remote_artifact_folder = ( f"/home/ubuntu/{benchmark_execution_id}/{model_name}/{ec2_instance_type}/{mode}/{batch_size}" ) ec2_connection.run(f"mkdir -p {remote_artifact_folder}") ec2_connection.run( f"cp -R /home/ubuntu/benchmark/* {remote_artifact_folder}") # Upload artifacts to s3 bucket ec2_connection.run( f"aws s3 cp --recursive /home/ubuntu/{benchmark_execution_id}/ {S3_BUCKET_BENCHMARK_ARTIFACTS}/{benchmark_execution_id}/" ) time.sleep(3) run(f"aws s3 cp --recursive /tmp/{model_name}/ {S3_BUCKET_BENCHMARK_ARTIFACTS}/{benchmark_execution_id}/{model_name}/{ec2_instance_type}/{mode}/{batch_size}" ) run(f"rm -rf /tmp/{model_name}") apacheBenchHandler.clean_up()
def ec2_instance( request, ec2_client, ec2_resource, ec2_instance_type, ec2_key_name, ec2_instance_role_name, ec2_instance_ami, region, ): use_instances_flag = request.config.getoption( "--use-instances") if request.config.getoption( "--use-instances") else None if use_instances_flag: instances_file = request.config.getoption("--use-instances") run(f"touch {instances_file}", warn=True) instances_dict = YamlHandler.load_yaml(instances_file) LOGGER.info(f"instances_dict: {instances_dict}") instances = instances_dict.get(request.node.name.split("[")[0], "") LOGGER.info(f"instances: {instances}") assert instances != "", f"Could not find instance details corresponding to test: {request.node.name.split('[')[0]}" instance_details = instances.get(ec2_instance_type, "") assert instance_details != "", f"Could not obtain details for instance type: {ec2_instance_type}" instance_id = instance_details.get("instance_id", "") assert instance_id != "", f"Missing instance_id" key_filename = instance_details.get("key_filename", "") assert key_filename != "", f"Missing key_filename" LOGGER.info( f"For test: {request.node.name}; Using instance_id: {instance_id} and key_filename: {key_filename}" ) return instance_id, key_filename key_filename = ec2_utils.generate_ssh_keypair(ec2_client, ec2_key_name) params = { "KeyName": ec2_key_name, "ImageId": ec2_instance_ami, "InstanceType": ec2_instance_type, "IamInstanceProfile": { "Name": ec2_instance_role_name }, "TagSpecifications": [ { "ResourceType": "instance", "Tags": [{ "Key": "Name", "Value": f"TS Benchmark {ec2_key_name}" }] }, ], "MaxCount": 1, "MinCount": 1, "BlockDeviceMappings": [{ "DeviceName": "/dev/sda1", "Ebs": { "VolumeSize": 220 } }], } try: instances = ec2_resource.create_instances(**params) except ClientError as e: if e.response["Error"]["Code"] == "InsufficientInstanceCapacity": LOGGER.warning( f"Failed to launch {ec2_instance_type} in {region} because of insufficient capacity" ) raise instance_id = instances[0].id LOGGER.info(f"Created instance: TS Benchmark {ec2_key_name}") # Define finalizer to terminate instance after this fixture completes def terminate_ec2_instance(): ec2_client.terminate_instances(InstanceIds=[instance_id]) def delete_ssh_keypair(): ec2_utils.destroy_ssh_keypair(ec2_client, key_filename) do_not_terminate_flag = request.config.getoption("--do-not-terminate") LOGGER.info(f"do_not_terminate_flag: {do_not_terminate_flag}") instances_file = os.path.join(os.getcwd(), "instances.yaml") run(f"touch {instances_file}", warn=True) if not do_not_terminate_flag: request.addfinalizer(terminate_ec2_instance) request.addfinalizer(delete_ssh_keypair) if do_not_terminate_flag and not use_instances_flag: instances_dict = YamlHandler.load_yaml(instances_file) if not instances_dict: instances_dict = {} update_dictionary = { request.node.name.split("[")[0]: { ec2_instance_type: { "instance_id": instance_id, "key_filename": key_filename } } } instances_dict.update(update_dictionary) YamlHandler.write_yaml(instances_file, instances_dict) ec2_utils.check_instance_state(instance_id, state="running", region=region) ec2_utils.check_system_state(instance_id, system_status="ok", instance_status="ok", region=region) return instance_id, key_filename