def _request_detections(**kwargs): try: body = kwargs.get('post_body') input_container_sas = body['input_container_sas'] images_requested_json_sas = body.get('images_requested_json_sas', None) image_path_prefix = body.get('image_path_prefix', None) first_n = body.get('first_n', None) first_n = int(first_n) if first_n else None sample_n = body.get('sample_n', None) sample_n = int(sample_n) if sample_n else None request_id = kwargs['request_id'] api_task_manager.UpdateTaskStatus(request_id, 'running.') if images_requested_json_sas is None: api_task_manager.UpdateTaskStatus(request_id, 'running - listing all images to process.') print('runserver.py, running - listing all images to process.') # list all images to process blob_prefix = None if image_path_prefix is None else image_path_prefix image_paths = SasBlob.list_blobs_in_container(api_config.MAX_NUMBER_IMAGES_ACCEPTED, sas_uri=input_container_sas, blob_prefix=blob_prefix, blob_suffix='.jpg') else: print('runserver.py, running - using provided list of images.') image_paths_text = SasBlob.download_blob_to_text(images_requested_json_sas) image_paths = json.loads(image_paths_text) print('runserver.py, length of image_paths provided by the user: {}'.format(len(image_paths))) image_paths = [i for i in image_paths if str(i).lower().endswith(api_config.ACCEPTED_IMAGE_FILE_ENDINGS)] print('runserver.py, length of image_paths provided by the user, after filtering to jpg: {}'.format( len(image_paths))) if image_path_prefix is not None: image_paths =[i for i in image_paths if str(i).startswith(image_path_prefix)] print('runserver.py, length of image_paths provided by the user, after filtering for image_path_prefix: {}'.format( len(image_paths))) res = orchestrator.spot_check_blob_paths_exist(image_paths, input_container_sas) if res is not None: raise LookupError('path {} provided in list of images to process does not exist in the container pointed to by data_container_sas.'.format(res)) # apply the first_n and sample_n filters if first_n is not None: assert first_n > 0, 'parameter first_n is 0.' image_paths = image_paths[:first_n] # will not error if first_n > total number of images if sample_n is not None: assert sample_n > 0, 'parameter sample_n is 0.' if sample_n > len(image_paths): raise ValueError('parameter sample_n specifies more images than available (after filtering by other provided params).') # we sample by just shuffling the image paths and take the first sample_n images print('First path before shuffling:', image_paths[0]) shuffle(image_paths) print('First path after shuffling:', image_paths[0]) image_paths = image_paths[:sample_n] image_paths = sorted(image_paths) num_images = len(image_paths) print('runserver.py, num_images after applying all filters: {}'.format(num_images)) if num_images < 1: api_task_manager.UpdateTaskStatus(request_id, 'completed - zero images found in container or in provided list of images after filtering with the provided parameters.') return if num_images > api_config.MAX_NUMBER_IMAGES_ACCEPTED: api_task_manager.UpdateTaskStatus(request_id, 'failed - the number of images ({}) requested for processing exceeds the maximum accepted ({}) in one call.'.format( num_images, api_config.MAX_NUMBER_IMAGES_ACCEPTED)) return image_paths_string = json.dumps(image_paths, indent=2) internal_storage_service.create_blob_from_text(internal_container, '{}/{}_images.json'.format(request_id, request_id), image_paths_string) api_task_manager.UpdateTaskStatus(request_id, 'running - images listed; processing {} images.'.format(num_images)) print('runserver.py, running - images listed; processing {} images.'.format(num_images)) # set up connection to AML Compute and data stores # do this for each request since pipeline step is associated with the data stores aml_compute = orchestrator.AMLCompute(request_id, input_container_sas, internal_datastore) print('AMLCompute resource connected successfully.') num_images_per_job = api_config.NUM_IMAGES_PER_JOB num_jobs = math.ceil(num_images / num_images_per_job) list_jobs = {} for job_index in range(num_jobs): begin, end = job_index * num_images_per_job, (job_index + 1) * num_images_per_job job_id = 'request{}_jobindex{}_total{}'.format(request_id, job_index, num_jobs) list_jobs[job_id] = { 'begin': begin, 'end': end } list_jobs_submitted = aml_compute.submit_jobs(request_id, list_jobs, api_task_manager, num_images) api_task_manager.UpdateTaskStatus(request_id, 'running - all {} images submitted to cluster for processing.'.format(num_images)) except Exception as e: api_task_manager.UpdateTaskStatus(request_id, 'failed - an error occurred while processing the request: {}'.format(str(e))) print('runserver.py, exception in _request_detections: {}'.format(str(e))) return # do not initiate _monitor_detections_request try: aml_monitor = orchestrator.AMLMonitor(request_id, list_jobs_submitted) # start another thread to monitor the jobs and consolidate the results when they finish ai4e_wrapper.wrap_async_endpoint(_monitor_detections_request, 'post:_monitor_detections_request', request_id=request_id, aml_monitor=aml_monitor) except Exception as e: api_task_manager.UpdateTaskStatus(request_id, 'failed - an error occurred when starting the status monitoring process. ' + 'The images should be submitted for processing though - please contact us to retrieve your results. Error: {}'.format(str(e))) print('runserver.py, exception when starting orchestrator.AMLMonitor: ', str(e))
def _request_detections(**kwargs: Any) -> None: try: body = kwargs.get('post_body') assert body is not None input_container_sas = body.get('input_container_sas', None) use_url = body.get('use_url', False) images_requested_json_sas = body.get('images_requested_json_sas', None) image_path_prefix = body.get('image_path_prefix', None) first_n = body.get('first_n', None) first_n = int(first_n) if first_n else None sample_n = body.get('sample_n', None) sample_n = int(sample_n) if sample_n else None model_version = body.get('model_version', '') if model_version == '': model_version = api_config.AML_CONFIG['default_model_version'] model_name = api_config.AML_CONFIG['models'][model_version] # request_name and request_submission_timestamp are for appending to # output file names request_name = body.get('request_name', '') request_submission_timestamp = orchestrator.get_utc_timestamp() request_id = kwargs['request_id'] task_status = orchestrator.get_task_status( 'running', 'Request received.') update_task_status(api_task_manager, request_id, task_status) print('runserver.py, request_id {}, '.format(request_id), 'model_version {}, model_name {}, '.format(model_version, model_name), 'request_name {}, '.format(request_name), 'submission timestamp is {}'.format(request_submission_timestamp)) # image_paths can be a list of strings (Azure blob names or public URLs) # or a list of length-2 lists where each is a [image_id, metadata] pair # Case 1: listing all images in the container # - not possible to have attached metadata if listing images in a blob if images_requested_json_sas is None: metadata_available = False task_status = orchestrator.get_task_status( 'running', 'Listing all images to process.') update_task_status(api_task_manager, request_id, task_status) print('runserver.py, running - listing all images to process.') # list all images to process image_paths = SasBlob.list_blobs_in_container( api_config.MAX_NUMBER_IMAGES_ACCEPTED + 1, # so > MAX_NUMBER_IMAGES_ACCEPTED will find that there are too many images requested so should not proceed sas_uri=input_container_sas, blob_prefix=image_path_prefix, blob_suffix='.jpg') # Case 2: user supplied a list of images to process; can include metadata else: print('runserver.py, running - using provided list of images.') image_paths_text = SasBlob.download_blob_to_text( images_requested_json_sas) image_paths = json.loads(image_paths_text) print('runserver.py, length of image_paths provided by the user: {}'.format(len(image_paths))) if len(image_paths) == 0: task_status = orchestrator.get_task_status( 'completed', '0 images found in provided list of images.') update_task_status(api_task_manager, request_id, task_status) return error, metadata_available = orchestrator.validate_provided_image_paths(image_paths) if error is not None: msg = 'image paths provided in the json are not valid: {}'.format(error) raise ValueError(msg) valid_image_paths = [] for p in image_paths: locator = p[0] if metadata_available else p # urlparse(p).path also preserves the extension on local paths path = urllib.parse.urlparse(locator).path.lower() if path.endswith(api_config.ACCEPTED_IMAGE_FILE_ENDINGS): valid_image_paths.append(p) image_paths = valid_image_paths print('runserver.py, length of image_paths provided by user, ' 'after filtering to jpg: {}'.format(len(image_paths))) valid_image_paths = [] if image_path_prefix is not None: for p in image_paths: locator = p[0] if metadata_available else p if locator.startswith(image_path_prefix): valid_image_paths.append(p) image_paths = valid_image_paths print('runserver.py, length of image_paths provided by user, ' 'after filtering for image_path_prefix: {}'.format(len(image_paths))) if not use_url: res = orchestrator.spot_check_blob_paths_exist( image_paths, input_container_sas, metadata_available) if res is not None: msg = ('path {} provided in list of images to process '.format(res), 'does not exist in the container pointed to by ' 'data_container_sas.') raise LookupError(msg) # apply the first_n and sample_n filters if first_n is not None: assert first_n > 0, 'parameter first_n is 0.' # OK if first_n > total number of images image_paths = image_paths[:first_n] if sample_n is not None: assert sample_n > 0, 'parameter sample_n is 0.' if sample_n > len(image_paths): msg = ('parameter sample_n specifies more images than ' 'available (after filtering by other provided params).') raise ValueError(msg) # sample by shuffling image paths and take the first sample_n images print('First path before shuffling:', image_paths[0]) shuffle(image_paths) print('First path after shuffling:', image_paths[0]) image_paths = orchestrator.sort_image_paths( image_paths[:sample_n], metadata_available) num_images = len(image_paths) print('runserver.py, num_images after applying all filters: {}'.format(num_images)) if num_images < 1: task_status = orchestrator.get_task_status( 'completed', 'Zero images found in container or in provided list of images ' 'after filtering with the provided parameters.') update_task_status(api_task_manager, request_id, task_status) return if num_images > api_config.MAX_NUMBER_IMAGES_ACCEPTED: task_status = orchestrator.get_task_status( 'failed', 'The number of images ({}) requested for processing exceeds the maximum accepted {} in one call'.format( num_images, api_config.MAX_NUMBER_IMAGES_ACCEPTED)) update_task_status(api_task_manager, request_id, task_status) return # finalized image_paths is uploaded to internal_container; all sharding # and scoring use the uploaded list image_paths_string = json.dumps(image_paths, indent=1) internal_storage_service.create_blob_from_text( internal_container, '{}/{}_images.json'.format(request_id, request_id), image_paths_string) # the list of images json does not have request_name or timestamp in the # file name so that score.py can locate it task_status = orchestrator.get_task_status( 'running', 'Images listed; processing {} images.'.format(num_images)) update_task_status(api_task_manager, request_id, task_status) print('runserver.py, running - images listed; processing {} images'.format(num_images)) # set up connection to AML Compute and data stores # do this for each request since pipeline step is associated with the # data stores aml_compute = orchestrator.AMLCompute( request_id=request_id, use_url=use_url, input_container_sas=input_container_sas, internal_datastore=internal_datastore, model_name=model_name) print('AMLCompute resource connected successfully.') num_images_per_job = api_config.NUM_IMAGES_PER_JOB num_jobs = math.ceil(num_images / num_images_per_job) # list_jobs: Dict[str, Dict[str, int]] = {} list_jobs = {} for job_index in range(num_jobs): begin = job_index * num_images_per_job end = begin + num_images_per_job # Experiment name must be between 1 and 36 characters long. Its # first character has to be alphanumeric, and the rest may contain # hyphens and underscores. shortened_request_id = request_id.split('-')[0] if len(shortened_request_id) > 8: shortened_request_id = shortened_request_id[:8] # request ID, job index, total job_id = 'r{}_i{}_t{}'.format(shortened_request_id, job_index, num_jobs) list_jobs[job_id] = {'begin': begin, 'end': end} list_jobs_submitted = aml_compute.submit_jobs( list_jobs, api_task_manager, num_images) task_status = orchestrator.get_task_status( 'running', 'All {} images submitted to cluster for processing.'.format(num_images)) update_task_status(api_task_manager, request_id, task_status) except Exception as e: task_status = orchestrator.get_task_status( 'failed', 'An error occurred while processing the request: {}'.format(e)) update_task_status(api_task_manager, request_id, task_status) print('runserver.py, exception in _request_detections: {}'.format(e)) return # do not initiate _monitor_detections_request try: aml_monitor = orchestrator.AMLMonitor( request_id=request_id, shortened_request_id=shortened_request_id, list_jobs_submitted=list_jobs_submitted, request_name=request_name, request_submission_timestamp=request_submission_timestamp, model_version=model_version) # start another thread to monitor the jobs and consolidate the results # when they finish # HACK ai4e_service._create_and_execute_thread( func=_monitor_detections_request, api_path='/request_detections_aml', request_id=request_id, aml_monitor=aml_monitor) # ai4e_service.wrap_async_endpoint( # _monitor_detections_request, # trace_name='post:_monitor_detections_request', # request_id=request_id, aml_monitor=aml_monitor) except Exception as e: task_status = orchestrator.get_task_status( 'problem', ('An error occurred when starting the status monitoring process. ' 'The images should be submitted for processing though - please ' 'contact us to retrieve your results. Error: {}'.format(e))) update_task_status(api_task_manager, request_id, task_status) print('runserver.py, exception when starting orchestrator.AMLMonitor: {}'.format(e))
def _request_detections(**kwargs): try: body = kwargs.get('post_body') input_container_sas = body.get('input_container_sas', None) use_url = body.get('use_url', False) images_requested_json_sas = body.get('images_requested_json_sas', None) image_path_prefix = body.get('image_path_prefix', None) first_n = body.get('first_n', None) first_n = int(first_n) if first_n else None sample_n = body.get('sample_n', None) sample_n = int(sample_n) if sample_n else None model_version = body.get('model_version', '') if model_version == '': model_version = api_config.AML_CONFIG['default_model_version'] model_name = api_config.AML_CONFIG['models'][model_version] # request_name and request_submission_timestamp are for appending to output file names request_name = body.get('request_name', '') request_submission_timestamp = orchestrator.get_utc_timestamp() request_id = kwargs['request_id'] api_task_manager.UpdateTaskStatus( request_id, get_task_status('running', 'Request received.')) print(( 'runserver.py, request_id {}, model_version {}, model_name {}, request_name {}, submission timestamp ' 'is {}').format(request_id, model_version, model_name, request_name, request_submission_timestamp)) # image_paths can be a list of strings (paths on Azure blobs or public URLs), or a list of lists, # each of length 2 and is the [image_id, metadata] pair # case 1 - listing all images in the container if images_requested_json_sas is None: metadata_available = False # not possible to have attached metadata if listing images in a blob api_task_manager.UpdateTaskStatus( request_id, get_task_status('running', 'Listing all images to process.')) print('runserver.py, running - listing all images to process.') # list all images to process image_paths = SasBlob.list_blobs_in_container( api_config.MAX_NUMBER_IMAGES_ACCEPTED + 1, # so > MAX_NUMBER_IMAGES_ACCEPTED will find that there are too many images requested so should not proceed sas_uri=input_container_sas, blob_prefix=image_path_prefix, blob_suffix='.jpg') # case 2 - user supplied a list of images to process; can include metadata else: print('runserver.py, running - using provided list of images.') image_paths_text = SasBlob.download_blob_to_text( images_requested_json_sas) image_paths = json.loads(image_paths_text) print( 'runserver.py, length of image_paths provided by the user: {}'. format(len(image_paths))) if len(image_paths) == 0: api_task_manager.UpdateTaskStatus( request_id, get_task_status( 'completed', 'Zero images found in provided list of images.')) return error, metadata_available = orchestrator.validate_provided_image_paths( image_paths) if error is not None: raise ValueError( 'image paths provided in the json are not valid: {}'. format(error)) valid_image_paths = [] for p in image_paths: locator = p[0] if metadata_available else p if locator.lower().endswith( api_config.ACCEPTED_IMAGE_FILE_ENDINGS): valid_image_paths.append(p) image_paths = valid_image_paths print( 'runserver.py, length of image_paths provided by the user, after filtering to jpg: {}' .format(len(image_paths))) valid_image_paths = [] if image_path_prefix is not None: for p in image_paths: locator = p[0] if metadata_available else p if locator.startswith(image_path_prefix): valid_image_paths.append(p) image_paths = valid_image_paths print( 'runserver.py, length of image_paths provided by the user, after filtering for image_path_prefix: {}' .format(len(image_paths))) if not use_url: res = orchestrator.spot_check_blob_paths_exist( image_paths, input_container_sas, metadata_available) if res is not None: raise LookupError( 'path {} provided in list of images to process does not exist in the container pointed to by data_container_sas.' .format(res)) # apply the first_n and sample_n filters if first_n is not None: assert first_n > 0, 'parameter first_n is 0.' image_paths = image_paths[: first_n] # will not error if first_n > total number of images if sample_n is not None: assert sample_n > 0, 'parameter sample_n is 0.' if sample_n > len(image_paths): raise ValueError( 'parameter sample_n specifies more images than available (after filtering by other provided params).' ) # we sample by shuffling the image paths and take the first sample_n images print('First path before shuffling:', image_paths[0]) shuffle(image_paths) print('First path after shuffling:', image_paths[0]) image_paths = image_paths[:sample_n] image_paths = orchestrator.sort_image_paths( image_paths, metadata_available) num_images = len(image_paths) print('runserver.py, num_images after applying all filters: {}'.format( num_images)) if num_images < 1: api_task_manager.UpdateTaskStatus( request_id, get_task_status( 'completed', 'Zero images found in container or in provided list of images after filtering with the provided parameters.' )) return if num_images > api_config.MAX_NUMBER_IMAGES_ACCEPTED: api_task_manager.UpdateTaskStatus( request_id, get_task_status( 'failed', 'The number of images ({}) requested for processing exceeds the maximum accepted ({}) in one call.' .format(num_images, api_config.MAX_NUMBER_IMAGES_ACCEPTED))) return # finalized image_paths is uploaded to internal_container; all sharding and scoring use the uploaded list image_paths_string = json.dumps(image_paths, indent=1) internal_storage_service.create_blob_from_text( internal_container, '{}/{}_images.json'.format(request_id, request_id), image_paths_string) # the list of images json does not have request_name or timestamp in the file name so that score.py can locate it api_task_manager.UpdateTaskStatus( request_id, get_task_status( 'running', 'Images listed; processing {} images.'.format(num_images))) print('runserver.py, running - images listed; processing {} images.'. format(num_images)) # set up connection to AML Compute and data stores # do this for each request since pipeline step is associated with the data stores aml_compute = orchestrator.AMLCompute( request_id=request_id, use_url=use_url, input_container_sas=input_container_sas, internal_datastore=internal_datastore, model_name=model_name) print('AMLCompute resource connected successfully.') num_images_per_job = api_config.NUM_IMAGES_PER_JOB num_jobs = math.ceil(num_images / num_images_per_job) list_jobs = {} for job_index in range(num_jobs): begin, end = job_index * num_images_per_job, ( job_index + 1) * num_images_per_job job_id = 'request{}_jobindex{}_total{}'.format( request_id, job_index, num_jobs) list_jobs[job_id] = {'begin': begin, 'end': end} list_jobs_submitted = aml_compute.submit_jobs(list_jobs, api_task_manager, num_images) api_task_manager.UpdateTaskStatus( request_id, get_task_status( 'running', 'All {} images submitted to cluster for processing.'.format( num_images))) except Exception as e: api_task_manager.UpdateTaskStatus( request_id, get_task_status( 'failed', 'An error occurred while processing the request: {}'.format( e))) print('runserver.py, exception in _request_detections: {}'.format( str(e))) return # do not initiate _monitor_detections_request try: aml_monitor = orchestrator.AMLMonitor( request_id=request_id, list_jobs_submitted=list_jobs_submitted, request_name=request_name, request_submission_timestamp=request_submission_timestamp, model_version=model_version) # start another thread to monitor the jobs and consolidate the results when they finish ai4e_wrapper.wrap_async_endpoint(_monitor_detections_request, 'post:_monitor_detections_request', request_id=request_id, aml_monitor=aml_monitor) except Exception as e: api_task_manager.UpdateTaskStatus( request_id, get_task_status('problem', ( 'An error occurred when starting the status monitoring process. ' 'The images should be submitted for processing though - please contact us to retrieve your results. ' 'Error: {}'.format(e)))) print( 'runserver.py, exception when starting orchestrator.AMLMonitor: ', str(e))