def test_ecs_tensorflow_inference_gpu(tensorflow_inference, ecs_container_instance, region, gpu_only): worker_instance_id, ecs_cluster_arn = ecs_container_instance public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region) num_gpus = ec2_utils.get_instance_num_gpus(worker_instance_id) model_name = "saved_model_half_plus_two" service_name = task_family = revision = None try: service_name, task_family, revision = ecs_utils.setup_ecs_inference_service( tensorflow_inference, "tensorflow", ecs_cluster_arn, model_name, worker_instance_id, num_gpus=num_gpus, region=region) model_name = get_tensorflow_model_name("gpu", model_name) inference_result = request_tensorflow_inference( model_name, ip_address=public_ip_address) assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}" finally: ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)
def test_ecs_pytorch_inference_eia(pytorch_inference_eia, ecs_container_instance, ei_accelerator_type, region, eia_only): worker_instance_id, ecs_cluster_arn = ecs_container_instance public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region) model_name = "pytorch-densenet" image_framework, image_framework_version = get_framework_and_version_from_tag( pytorch_inference_eia) if image_framework_version == "1.3.1": model_name = "pytorch-densenet-v1-3-1" service_name = task_family = revision = None try: service_name, task_family, revision = ecs_utils.setup_ecs_inference_service( pytorch_inference_eia, "pytorch", ecs_cluster_arn, model_name, worker_instance_id, ei_accelerator_type, region=region) inference_result = request_pytorch_inference_densenet( public_ip_address) assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}" finally: ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)
def test_ecs_pytorch_inference_gpu(pytorch_inference, ecs_container_instance, region, gpu_only): worker_instance_id, ecs_cluster_arn = ecs_container_instance public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region) num_gpus = ec2_utils.get_instance_num_gpus(worker_instance_id, region=region) model_name = "pytorch-densenet" service_name = task_family = revision = None try: service_name, task_family, revision = ecs_utils.setup_ecs_inference_service( pytorch_inference, "pytorch", ecs_cluster_arn, model_name, worker_instance_id, num_gpus=num_gpus, region=region) inference_result = request_pytorch_inference_densenet( public_ip_address) assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}" finally: ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)
def test_ecs_mxnet_inference_eia(mxnet_inference_eia, ecs_container_instance, ei_accelerator_type, region, eia_only): worker_instance_id, ecs_cluster_arn = ecs_container_instance public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region) model_name = "resnet-152-eia" service_name = task_family = revision = None try: service_name, task_family, revision = ecs_utils.setup_ecs_inference_service( mxnet_inference_eia, "mxnet", ecs_cluster_arn, model_name, worker_instance_id, ei_accelerator_type, region=region, ) inference_result = request_mxnet_inference(public_ip_address, model="resnet-152-eia") assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}" finally: ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)
def test_ecs_pytorch_inference_neuron(pytorch_inference_neuron, ecs_container_instance, region): worker_instance_id, ecs_cluster_arn = ecs_container_instance public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region) num_neurons = ec2_utils.get_instance_num_inferentias(worker_instance_id, region=region) model_name = "pytorch-resnet-neuron" service_name = task_family = revision = None try: service_name, task_family, revision = ecs_utils.setup_ecs_inference_service( pytorch_inference_neuron, "pytorch", ecs_cluster_arn, model_name, worker_instance_id, num_neurons=num_neurons, region=region) server_type = get_inference_server_type(pytorch_inference_neuron) inference_result = request_pytorch_inference_densenet( public_ip_address, server_type=server_type, model_name=model_name) assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}" finally: ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)
def __ecs_tensorflow_inference_cpu_nlp(tensorflow_inference, ecs_container_instance, region): worker_instance_id, ecs_cluster_arn = ecs_container_instance public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region) model_name = "albert" service_name = task_family = revision = None try: service_name, task_family, revision = ecs_utils.setup_ecs_inference_service( tensorflow_inference, "tensorflow", ecs_cluster_arn, model_name, worker_instance_id, region=region ) model_name = get_tensorflow_model_name("cpu", model_name) inference_result = request_tensorflow_inference_nlp(model_name, ip_address=public_ip_address) assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}" finally: ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)
def test_ecs_mxnet_inference_cpu(mxnet_inference, ecs_container_instance, region, cpu_only): worker_instance_id, ecs_cluster_arn = ecs_container_instance public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region) model_name = "squeezenet" service_name = task_family = revision = None try: service_name, task_family, revision = ecs_utils.setup_ecs_inference_service( mxnet_inference, "mxnet", ecs_cluster_arn, model_name, worker_instance_id, region=region) inference_result = request_mxnet_inference(public_ip_address) assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}" finally: ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)
def test_ecs_tensorflow_inference_neuron(tensorflow_inference_neuron, ecs_container_instance, region): worker_instance_id, ecs_cluster_arn = ecs_container_instance public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region) num_neurons = ec2_utils.get_instance_num_inferentias(worker_instance_id) model_name = "simple" service_name = task_family = revision = None try: service_name, task_family, revision = ecs_utils.setup_ecs_inference_service( tensorflow_inference_neuron, "tensorflow", ecs_cluster_arn, model_name, worker_instance_id, num_neurons=num_neurons, region=region, ) model_name = get_tensorflow_model_name("neuron", model_name) inference_result = request_tensorflow_inference(model_name, ip_address=public_ip_address, inference_string="'{\"instances\": [[1.0, 2.0, 5.0]]}'") assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}" finally: ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)