示例#1
0
def _request_detections(**kwargs):
    try:
        body = kwargs.get('post_body')

        input_container_sas = body['input_container_sas']
        images_requested_json_sas = body.get('images_requested_json_sas', None)
        image_path_prefix = body.get('image_path_prefix', None)

        first_n = body.get('first_n', None)
        first_n = int(first_n) if first_n else None
        sample_n = body.get('sample_n', None)
        sample_n = int(sample_n) if sample_n else None

        request_id = kwargs['request_id']
        api_task_manager.UpdateTaskStatus(request_id, 'running.')

        if images_requested_json_sas is None:
            api_task_manager.UpdateTaskStatus(request_id, 'running - listing all images to process.')
            print('runserver.py, running - listing all images to process.')

            # list all images to process
            blob_prefix = None if image_path_prefix is None else image_path_prefix
            image_paths = SasBlob.list_blobs_in_container(api_config.MAX_NUMBER_IMAGES_ACCEPTED,
                                                          sas_uri=input_container_sas,
                                                          blob_prefix=blob_prefix, blob_suffix='.jpg')
        else:
            print('runserver.py, running - using provided list of images.')
            image_paths_text = SasBlob.download_blob_to_text(images_requested_json_sas)
            image_paths = json.loads(image_paths_text)
            print('runserver.py, length of image_paths provided by the user: {}'.format(len(image_paths)))

            image_paths = [i for i in image_paths if str(i).lower().endswith(api_config.ACCEPTED_IMAGE_FILE_ENDINGS)]
            print('runserver.py, length of image_paths provided by the user, after filtering to jpg: {}'.format(
                len(image_paths)))

            if image_path_prefix is not None:
                image_paths =[i for i in image_paths if str(i).startswith(image_path_prefix)]
                print('runserver.py, length of image_paths provided by the user, after filtering for image_path_prefix: {}'.format(
                    len(image_paths)))

            res = orchestrator.spot_check_blob_paths_exist(image_paths, input_container_sas)
            if res is not None:
                raise LookupError('path {} provided in list of images to process does not exist in the container pointed to by data_container_sas.'.format(res))

        # apply the first_n and sample_n filters
        if first_n is not None:
            assert first_n > 0, 'parameter first_n is 0.'
            image_paths = image_paths[:first_n]  # will not error if first_n > total number of images

        if sample_n is not None:
            assert sample_n > 0, 'parameter sample_n is 0.'
            if sample_n > len(image_paths):
                raise ValueError('parameter sample_n specifies more images than available (after filtering by other provided params).')

            # we sample by just shuffling the image paths and take the first sample_n images
            print('First path before shuffling:', image_paths[0])
            shuffle(image_paths)
            print('First path after shuffling:', image_paths[0])
            image_paths = image_paths[:sample_n]
            image_paths = sorted(image_paths)

        num_images = len(image_paths)
        print('runserver.py, num_images after applying all filters: {}'.format(num_images))
        if num_images < 1:
            api_task_manager.UpdateTaskStatus(request_id, 'completed - zero images found in container or in provided list of images after filtering with the provided parameters.')
            return
        if num_images > api_config.MAX_NUMBER_IMAGES_ACCEPTED:
            api_task_manager.UpdateTaskStatus(request_id,
                                              'failed - the number of images ({}) requested for processing exceeds the maximum accepted ({}) in one call.'.format(
                                                  num_images, api_config.MAX_NUMBER_IMAGES_ACCEPTED))
            return

        image_paths_string = json.dumps(image_paths, indent=2)
        internal_storage_service.create_blob_from_text(internal_container,
                                                       '{}/{}_images.json'.format(request_id, request_id),
                                                       image_paths_string)
        api_task_manager.UpdateTaskStatus(request_id, 'running - images listed; processing {} images.'.format(num_images))
        print('runserver.py, running - images listed; processing {} images.'.format(num_images))

        # set up connection to AML Compute and data stores
        # do this for each request since pipeline step is associated with the data stores
        aml_compute = orchestrator.AMLCompute(request_id, input_container_sas, internal_datastore)
        print('AMLCompute resource connected successfully.')

        num_images_per_job = api_config.NUM_IMAGES_PER_JOB
        num_jobs = math.ceil(num_images / num_images_per_job)

        list_jobs = {}
        for job_index in range(num_jobs):
            begin, end = job_index * num_images_per_job, (job_index + 1) * num_images_per_job
            job_id = 'request{}_jobindex{}_total{}'.format(request_id, job_index, num_jobs)
            list_jobs[job_id] = { 'begin': begin, 'end': end }

        list_jobs_submitted = aml_compute.submit_jobs(request_id, list_jobs, api_task_manager, num_images)
        api_task_manager.UpdateTaskStatus(request_id,
                                          'running - all {} images submitted to cluster for processing.'.format(num_images))

    except Exception as e:
        api_task_manager.UpdateTaskStatus(request_id,
                                          'failed - an error occurred while processing the request: {}'.format(str(e)))
        print('runserver.py, exception in _request_detections: {}'.format(str(e)))
        return  # do not initiate _monitor_detections_request

    try:
        aml_monitor = orchestrator.AMLMonitor(request_id, list_jobs_submitted)

        # start another thread to monitor the jobs and consolidate the results when they finish
        ai4e_wrapper.wrap_async_endpoint(_monitor_detections_request, 'post:_monitor_detections_request',
                                         request_id=request_id,
                                         aml_monitor=aml_monitor)
    except Exception as e:
        api_task_manager.UpdateTaskStatus(request_id,
            'failed - an error occurred when starting the status monitoring process. ' +
            'The images should be submitted for processing though - please contact us to retrieve your results. Error: {}'.format(str(e)))
        print('runserver.py, exception when starting orchestrator.AMLMonitor: ', str(e))
示例#2
0
def _request_detections(**kwargs: Any) -> None:
    try:
        body = kwargs.get('post_body')
        assert body is not None

        input_container_sas = body.get('input_container_sas', None)

        use_url = body.get('use_url', False)

        images_requested_json_sas = body.get('images_requested_json_sas', None)

        image_path_prefix = body.get('image_path_prefix', None)

        first_n = body.get('first_n', None)
        first_n = int(first_n) if first_n else None

        sample_n = body.get('sample_n', None)
        sample_n = int(sample_n) if sample_n else None

        model_version = body.get('model_version', '')
        if model_version == '':
            model_version = api_config.AML_CONFIG['default_model_version']
        model_name = api_config.AML_CONFIG['models'][model_version]

        # request_name and request_submission_timestamp are for appending to
        # output file names
        request_name = body.get('request_name', '')
        request_submission_timestamp = orchestrator.get_utc_timestamp()

        request_id = kwargs['request_id']
        task_status = orchestrator.get_task_status(
            'running', 'Request received.')
        update_task_status(api_task_manager, request_id, task_status)
        print('runserver.py, request_id {}, '.format(request_id),
              'model_version {}, model_name {}, '.format(model_version, model_name),
              'request_name {}, '.format(request_name),
              'submission timestamp is {}'.format(request_submission_timestamp))

        # image_paths can be a list of strings (Azure blob names or public URLs)
        # or a list of length-2 lists where each is a [image_id, metadata] pair

        # Case 1: listing all images in the container
        # - not possible to have attached metadata if listing images in a blob
        if images_requested_json_sas is None:
            metadata_available = False
            task_status = orchestrator.get_task_status(
                'running', 'Listing all images to process.')
            update_task_status(api_task_manager, request_id, task_status)
            print('runserver.py, running - listing all images to process.')

            # list all images to process
            image_paths = SasBlob.list_blobs_in_container(
                api_config.MAX_NUMBER_IMAGES_ACCEPTED + 1,
                # so > MAX_NUMBER_IMAGES_ACCEPTED will find that there are too many images requested so should not proceed
                sas_uri=input_container_sas,
                blob_prefix=image_path_prefix, blob_suffix='.jpg')

        # Case 2: user supplied a list of images to process; can include metadata
        else:
            print('runserver.py, running - using provided list of images.')
            image_paths_text = SasBlob.download_blob_to_text(
                images_requested_json_sas)
            image_paths = json.loads(image_paths_text)
            print('runserver.py, length of image_paths provided by the user: {}'.format(len(image_paths)))
            if len(image_paths) == 0:
                task_status = orchestrator.get_task_status(
                    'completed', '0 images found in provided list of images.')
                update_task_status(api_task_manager, request_id, task_status)
                return

            error, metadata_available = orchestrator.validate_provided_image_paths(image_paths)
            if error is not None:
                msg = 'image paths provided in the json are not valid: {}'.format(error)
                raise ValueError(msg)

            valid_image_paths = []
            for p in image_paths:
                locator = p[0] if metadata_available else p
                # urlparse(p).path also preserves the extension on local paths
                path = urllib.parse.urlparse(locator).path.lower()
                if path.endswith(api_config.ACCEPTED_IMAGE_FILE_ENDINGS):
                    valid_image_paths.append(p)
            image_paths = valid_image_paths
            print('runserver.py, length of image_paths provided by user, '
                  'after filtering to jpg: {}'.format(len(image_paths)))

            valid_image_paths = []
            if image_path_prefix is not None:
                for p in image_paths:
                    locator = p[0] if metadata_available else p
                    if locator.startswith(image_path_prefix):
                        valid_image_paths.append(p)
                image_paths = valid_image_paths
                print('runserver.py, length of image_paths provided by user, '
                      'after filtering for image_path_prefix: {}'.format(len(image_paths)))

            if not use_url:
                res = orchestrator.spot_check_blob_paths_exist(
                    image_paths, input_container_sas, metadata_available)
                if res is not None:
                    msg = ('path {} provided in list of images to process '.format(res),
                           'does not exist in the container pointed to by '
                           'data_container_sas.')
                    raise LookupError(msg)

        # apply the first_n and sample_n filters
        if first_n is not None:
            assert first_n > 0, 'parameter first_n is 0.'
            # OK if first_n > total number of images
            image_paths = image_paths[:first_n]

        if sample_n is not None:
            assert sample_n > 0, 'parameter sample_n is 0.'
            if sample_n > len(image_paths):
                msg = ('parameter sample_n specifies more images than '
                       'available (after filtering by other provided params).')
                raise ValueError(msg)

            # sample by shuffling image paths and take the first sample_n images
            print('First path before shuffling:', image_paths[0])
            shuffle(image_paths)
            print('First path after shuffling:', image_paths[0])
            image_paths = orchestrator.sort_image_paths(
                image_paths[:sample_n], metadata_available)

        num_images = len(image_paths)
        print('runserver.py, num_images after applying all filters: {}'.format(num_images))
        if num_images < 1:
            task_status = orchestrator.get_task_status(
                'completed',
                'Zero images found in container or in provided list of images '
                'after filtering with the provided parameters.')
            update_task_status(api_task_manager, request_id, task_status)
            return
        if num_images > api_config.MAX_NUMBER_IMAGES_ACCEPTED:
            task_status = orchestrator.get_task_status(
                'failed',
                'The number of images ({}) requested for processing exceeds the maximum accepted {} in one call'.format(
                    num_images, api_config.MAX_NUMBER_IMAGES_ACCEPTED))
            update_task_status(api_task_manager, request_id, task_status)
            return

        # finalized image_paths is uploaded to internal_container; all sharding
        # and scoring use the uploaded list
        image_paths_string = json.dumps(image_paths, indent=1)
        internal_storage_service.create_blob_from_text(
            internal_container, '{}/{}_images.json'.format(request_id, request_id),
            image_paths_string)
        # the list of images json does not have request_name or timestamp in the
        # file name so that score.py can locate it

        task_status = orchestrator.get_task_status(
            'running', 'Images listed; processing {} images.'.format(num_images))
        update_task_status(api_task_manager, request_id, task_status)
        print('runserver.py, running - images listed; processing {} images'.format(num_images))

        # set up connection to AML Compute and data stores
        # do this for each request since pipeline step is associated with the
        # data stores
        aml_compute = orchestrator.AMLCompute(
            request_id=request_id, use_url=use_url,
            input_container_sas=input_container_sas,
            internal_datastore=internal_datastore, model_name=model_name)
        print('AMLCompute resource connected successfully.')

        num_images_per_job = api_config.NUM_IMAGES_PER_JOB
        num_jobs = math.ceil(num_images / num_images_per_job)

        # list_jobs: Dict[str, Dict[str, int]] = {}
        list_jobs = {}
        for job_index in range(num_jobs):
            begin = job_index * num_images_per_job
            end = begin + num_images_per_job

            # Experiment name must be between 1 and 36 characters long. Its
            # first character has to be alphanumeric, and the rest may contain
            # hyphens and underscores.
            shortened_request_id = request_id.split('-')[0]
            if len(shortened_request_id) > 8:
                shortened_request_id = shortened_request_id[:8]

            # request ID, job index, total
            job_id = 'r{}_i{}_t{}'.format(shortened_request_id, job_index, num_jobs)

            list_jobs[job_id] = {'begin': begin, 'end': end}

        list_jobs_submitted = aml_compute.submit_jobs(
            list_jobs, api_task_manager, num_images)
        task_status = orchestrator.get_task_status(
            'running',
            'All {} images submitted to cluster for processing.'.format(num_images))
        update_task_status(api_task_manager, request_id, task_status)

    except Exception as e:
        task_status = orchestrator.get_task_status(
            'failed', 'An error occurred while processing the request: {}'.format(e))
        update_task_status(api_task_manager, request_id, task_status)
        print('runserver.py, exception in _request_detections: {}'.format(e))
        return  # do not initiate _monitor_detections_request

    try:
        aml_monitor = orchestrator.AMLMonitor(
            request_id=request_id,
            shortened_request_id=shortened_request_id,
            list_jobs_submitted=list_jobs_submitted,
            request_name=request_name,
            request_submission_timestamp=request_submission_timestamp,
            model_version=model_version)

        # start another thread to monitor the jobs and consolidate the results
        # when they finish
        # HACK
        ai4e_service._create_and_execute_thread(
            func=_monitor_detections_request,
            api_path='/request_detections_aml',
            request_id=request_id, aml_monitor=aml_monitor)

        # ai4e_service.wrap_async_endpoint(
        #     _monitor_detections_request,
        #     trace_name='post:_monitor_detections_request',
        #     request_id=request_id, aml_monitor=aml_monitor)
    except Exception as e:
        task_status = orchestrator.get_task_status(
            'problem',
            ('An error occurred when starting the status monitoring process. '
             'The images should be submitted for processing though - please '
             'contact us to retrieve your results. Error: {}'.format(e)))
        update_task_status(api_task_manager, request_id, task_status)
        print('runserver.py, exception when starting orchestrator.AMLMonitor: {}'.format(e))
示例#3
0
def _request_detections(**kwargs):
    try:
        body = kwargs.get('post_body')

        input_container_sas = body.get('input_container_sas', None)

        use_url = body.get('use_url', False)

        images_requested_json_sas = body.get('images_requested_json_sas', None)

        image_path_prefix = body.get('image_path_prefix', None)

        first_n = body.get('first_n', None)
        first_n = int(first_n) if first_n else None

        sample_n = body.get('sample_n', None)
        sample_n = int(sample_n) if sample_n else None

        model_version = body.get('model_version', '')
        if model_version == '':
            model_version = api_config.AML_CONFIG['default_model_version']
        model_name = api_config.AML_CONFIG['models'][model_version]

        # request_name and request_submission_timestamp are for appending to output file names
        request_name = body.get('request_name', '')
        request_submission_timestamp = orchestrator.get_utc_timestamp()

        request_id = kwargs['request_id']
        api_task_manager.UpdateTaskStatus(
            request_id, get_task_status('running', 'Request received.'))
        print((
            'runserver.py, request_id {}, model_version {}, model_name {}, request_name {}, submission timestamp '
            'is {}').format(request_id, model_version, model_name,
                            request_name, request_submission_timestamp))

        # image_paths can be a list of strings (paths on Azure blobs or public URLs), or a list of lists,
        # each of length 2 and is the [image_id, metadata] pair

        # case 1 - listing all images in the container
        if images_requested_json_sas is None:
            metadata_available = False  # not possible to have attached metadata if listing images in a blob
            api_task_manager.UpdateTaskStatus(
                request_id,
                get_task_status('running', 'Listing all images to process.'))
            print('runserver.py, running - listing all images to process.')

            # list all images to process
            image_paths = SasBlob.list_blobs_in_container(
                api_config.MAX_NUMBER_IMAGES_ACCEPTED +
                1,  # so > MAX_NUMBER_IMAGES_ACCEPTED will find that there are too many images requested so should not proceed
                sas_uri=input_container_sas,
                blob_prefix=image_path_prefix,
                blob_suffix='.jpg')
        # case 2 - user supplied a list of images to process; can include metadata
        else:
            print('runserver.py, running - using provided list of images.')
            image_paths_text = SasBlob.download_blob_to_text(
                images_requested_json_sas)
            image_paths = json.loads(image_paths_text)
            print(
                'runserver.py, length of image_paths provided by the user: {}'.
                format(len(image_paths)))
            if len(image_paths) == 0:
                api_task_manager.UpdateTaskStatus(
                    request_id,
                    get_task_status(
                        'completed',
                        'Zero images found in provided list of images.'))
                return

            error, metadata_available = orchestrator.validate_provided_image_paths(
                image_paths)
            if error is not None:
                raise ValueError(
                    'image paths provided in the json are not valid: {}'.
                    format(error))

            valid_image_paths = []
            for p in image_paths:
                locator = p[0] if metadata_available else p
                if locator.lower().endswith(
                        api_config.ACCEPTED_IMAGE_FILE_ENDINGS):
                    valid_image_paths.append(p)
            image_paths = valid_image_paths
            print(
                'runserver.py, length of image_paths provided by the user, after filtering to jpg: {}'
                .format(len(image_paths)))

            valid_image_paths = []
            if image_path_prefix is not None:
                for p in image_paths:
                    locator = p[0] if metadata_available else p
                    if locator.startswith(image_path_prefix):
                        valid_image_paths.append(p)
                image_paths = valid_image_paths
                print(
                    'runserver.py, length of image_paths provided by the user, after filtering for image_path_prefix: {}'
                    .format(len(image_paths)))

            if not use_url:
                res = orchestrator.spot_check_blob_paths_exist(
                    image_paths, input_container_sas, metadata_available)
                if res is not None:
                    raise LookupError(
                        'path {} provided in list of images to process does not exist in the container pointed to by data_container_sas.'
                        .format(res))

        # apply the first_n and sample_n filters
        if first_n is not None:
            assert first_n > 0, 'parameter first_n is 0.'
            image_paths = image_paths[:
                                      first_n]  # will not error if first_n > total number of images

        if sample_n is not None:
            assert sample_n > 0, 'parameter sample_n is 0.'
            if sample_n > len(image_paths):
                raise ValueError(
                    'parameter sample_n specifies more images than available (after filtering by other provided params).'
                )

            # we sample by shuffling the image paths and take the first sample_n images
            print('First path before shuffling:', image_paths[0])
            shuffle(image_paths)
            print('First path after shuffling:', image_paths[0])
            image_paths = image_paths[:sample_n]
            image_paths = orchestrator.sort_image_paths(
                image_paths, metadata_available)

        num_images = len(image_paths)
        print('runserver.py, num_images after applying all filters: {}'.format(
            num_images))
        if num_images < 1:
            api_task_manager.UpdateTaskStatus(
                request_id,
                get_task_status(
                    'completed',
                    'Zero images found in container or in provided list of images after filtering with the provided parameters.'
                ))
            return
        if num_images > api_config.MAX_NUMBER_IMAGES_ACCEPTED:
            api_task_manager.UpdateTaskStatus(
                request_id,
                get_task_status(
                    'failed',
                    'The number of images ({}) requested for processing exceeds the maximum accepted ({}) in one call.'
                    .format(num_images,
                            api_config.MAX_NUMBER_IMAGES_ACCEPTED)))
            return

        # finalized image_paths is uploaded to internal_container; all sharding and scoring use the uploaded list
        image_paths_string = json.dumps(image_paths, indent=1)
        internal_storage_service.create_blob_from_text(
            internal_container,
            '{}/{}_images.json'.format(request_id,
                                       request_id), image_paths_string)
        # the list of images json does not have request_name or timestamp in the file name so that score.py can locate it

        api_task_manager.UpdateTaskStatus(
            request_id,
            get_task_status(
                'running',
                'Images listed; processing {} images.'.format(num_images)))
        print('runserver.py, running - images listed; processing {} images.'.
              format(num_images))

        # set up connection to AML Compute and data stores
        # do this for each request since pipeline step is associated with the data stores
        aml_compute = orchestrator.AMLCompute(
            request_id=request_id,
            use_url=use_url,
            input_container_sas=input_container_sas,
            internal_datastore=internal_datastore,
            model_name=model_name)
        print('AMLCompute resource connected successfully.')

        num_images_per_job = api_config.NUM_IMAGES_PER_JOB
        num_jobs = math.ceil(num_images / num_images_per_job)

        list_jobs = {}
        for job_index in range(num_jobs):
            begin, end = job_index * num_images_per_job, (
                job_index + 1) * num_images_per_job
            job_id = 'request{}_jobindex{}_total{}'.format(
                request_id, job_index, num_jobs)
            list_jobs[job_id] = {'begin': begin, 'end': end}

        list_jobs_submitted = aml_compute.submit_jobs(list_jobs,
                                                      api_task_manager,
                                                      num_images)
        api_task_manager.UpdateTaskStatus(
            request_id,
            get_task_status(
                'running',
                'All {} images submitted to cluster for processing.'.format(
                    num_images)))

    except Exception as e:
        api_task_manager.UpdateTaskStatus(
            request_id,
            get_task_status(
                'failed',
                'An error occurred while processing the request: {}'.format(
                    e)))
        print('runserver.py, exception in _request_detections: {}'.format(
            str(e)))
        return  # do not initiate _monitor_detections_request

    try:
        aml_monitor = orchestrator.AMLMonitor(
            request_id=request_id,
            list_jobs_submitted=list_jobs_submitted,
            request_name=request_name,
            request_submission_timestamp=request_submission_timestamp,
            model_version=model_version)

        # start another thread to monitor the jobs and consolidate the results when they finish
        ai4e_wrapper.wrap_async_endpoint(_monitor_detections_request,
                                         'post:_monitor_detections_request',
                                         request_id=request_id,
                                         aml_monitor=aml_monitor)
    except Exception as e:
        api_task_manager.UpdateTaskStatus(
            request_id,
            get_task_status('problem', (
                'An error occurred when starting the status monitoring process. '
                'The images should be submitted for processing though - please contact us to retrieve your results. '
                'Error: {}'.format(e))))
        print(
            'runserver.py, exception when starting orchestrator.AMLMonitor: ',
            str(e))