Exemplo n.º 1
0
def scrub_pixels(bid):
    '''scrub pixels (not currently triggered) will be run to scrub pixel data
    before identifiers are extracted from the header.
    '''
    from .get import get_identifiers
    batch = Batch.objects.get(id=bid)
    images = batch.image_set.all()
    batch.change_images_status('PROCESSING')

    # from deid.dicom import scrub_pixels

    for dcm in images:

        dcm_file = dcm.image.path
        dicom_uid = os.path.basename(dcm_file)
        dicom = dcm.load_dicom()

        if dicom.get("BurnedInAnnotation") is not None:

            # We shouldn't be in this function if False, but we will check again anyway
            if ANONYMIZE_PIXELS is True:
                print("Anonymization will be done here.")
            else:
                message = "%s has pixel identifiers, anonymize pixels is off, but added to batch. Removing!" % dcm_file
                dicom.delete(
                )  # if django-cleanup not in apps, will not delete image file
                batch = add_batch_error(message, batch)

    # At the end, move on to processing headers
    return get_identifiers(bid=batch.id)
Exemplo n.º 2
0
def replace_identifiers(bid, run_upload_storage=False):
    '''replace identifiers is called from get_identifiers, given that the user
    has asked to anonymize_restful. This function will do the replacement,
    and then trigger the function to send to storage
    '''

    batch = Batch.objects.get(id=bid)
    batch.qa['ProcessStartTime'] = time.time()
    batch_ids = BatchIdentifiers.objects.get(batch=batch)

    # 1) use response from API to generate new fields
    working = deepcopy(batch_ids.ids)
    prepared = prepare_identifiers(response=batch_ids.response, ids=working)
    updated = deepcopy(prepared)
    # 3) use response from API to anonymize all fields in batch.ids
    # clean_identifiers(ids, deid=None, image_type=None, default=None)
    # deid as None will use default "deid.dicom" provided in application
    # specifying a custom file/tag will use this filter first (in addition)
    deid = STUDY_DEID
    cleaned = clean_identifiers(ids=updated, default="KEEP", deid=deid)
    # Save progress
    batch_ids.cleaned = cleaned
    batch_ids.updated = updated
    batch_ids.save()

    # Get updated files
    dicom_files = batch.get_image_paths()
    output_folder = batch.get_path()
    updated_files = replace_ids(
        dicom_files=dicom_files,
        deid=deid,
        ids=updated,  # ids[item] lookup
        overwrite=True,  # overwrites copied files
        output_folder=output_folder,
        strip_sequences=True,
        remove_private=True)  # force = True
    # save = True,
    # Get shared information
    aggregate = ["BodyPartExamined", "Modality", "StudyDescription"]
    shared_ids = get_shared_identifiers(dicom_files=updated_files,
                                        aggregate=aggregate)
    batch_ids.shared = shared_ids
    batch_ids.save()

    # Rename
    for dcm in batch.image_set.all():
        item_id = os.path.basename(dcm.image.path)
        try:
            dicom = dcm.load_dicom()
            # S6M0<MRN-SUID>_<JITTERED-REPORT-DATE>_<ACCESSIONNUMBER-SUID>
            # Rename the dicom based on suid
            if item_id in updated:
                item_suid = updated[item_id]['item_id']
                dcm = dcm.rename(item_suid)  # added to [prefix][dcm.name]
                dcm.save()
            # If we don't have the id, don't risk uploading
            else:
                message = "%s for Image Id %s file read error: skipping." % (
                    item_id, dcm.id)
                batch = add_batch_error(message, batch)
                dcm.delete()
        except:
            message = "%s for Image Id %s not found in lookup: skipping." % (
                item_id, dcm.id)
            batch = add_batch_error(message, batch)
            dcm.delete()

    batch.qa['ProcessFinishTime'] = time.time()

    # We don't get here if the call above failed
    change_status(batch, "DONEPROCESSING")
    batch.save()

    if run_upload_storage is True:
        return upload_storage(batch_ids=[bid])
    else:
        updated_files = batch.get_image_paths()
        return updated_files
Exemplo n.º 3
0
def import_dicomdir(dicom_dir, run_get_identifiers=True):
    '''import dicom directory manages importing a valid dicom set into 
    the application, and is a celery job triggered by the watcher. 
    Here we also flag (and disclude) images that have a header value 
    that indicates pixel identifiers.
    '''
    start_time = time.time()

    if os.path.exists(dicom_dir):
        try:
            dicom_files = ls_fullpath(dicom_dir)
        except NotADirectoryError:
            bot.error('%s is not a directory, skipping.' % dicom_dir)
            return

        bot.debug("Importing %s, found %s .dcm files" %
                  (dicom_dir, len(dicom_files)))

        # The batch --> the folder with a set of dicoms tied to one request
        dcm_folder = os.path.basename(dicom_dir)
        batch, created = Batch.objects.get_or_create(uid=dcm_folder)
        batch.logs['STARTING_IMAGE_COUNT'] = len(dicom_files)

        # Data quality check: keep a record of study dates
        study_dates = dict()
        size_bytes = sum(os.path.getsize(f) for f in dicom_files)
        messages = []  # print all unique messages / warnings at end

        # Add in each dicom file to the series
        for dcm_file in dicom_files:
            try:

                # The dicom folder will be named based on the accession#
                dcm = read_file(dcm_file, force=True)
                dicom_uid = os.path.basename(dcm_file)

                # Keep track of studyDate
                study_date = dcm.get('StudyDate')
                if study_date not in study_dates:
                    study_dates[study_date] = 0
                study_dates[study_date] += 1
                flag, flag_group, reason = has_burned_pixels(
                    dicom_file=dcm_file, quiet=True, deid=STUDY_DEID)

                # If the image is flagged, we don't include and move on
                continue_processing = True
                if flag is True:
                    if flag_group not in ["whitelist"]:
                        continue_processing = False
                        message = "%s is flagged in %s: %s, skipping" % (
                            dicom_uid, flag_group, reason)

                        batch = add_batch_warning(message, batch, quiet=True)
                        message = "BurnedInAnnotation found for batch %s" % batch.uid
                        if message not in messages:
                            messages.append(message)

                if continue_processing is True:
                    # Create the Image object in the database
                    # A dicom instance number must be unique for its batch
                    dicom = Image.objects.create(batch=batch, uid=dicom_uid)

                    # Save the dicom file to storage
                    # basename = "%s/%s" %(batch.id,os.path.basename(dcm_file))
                    dicom = save_image_dicom(dicom=dicom,
                                             dicom_file=dcm_file)  # Also saves

                    # Generate image name based on [SUID] added later
                    # accessionnumberSUID.seriesnumber.imagenumber,
                    name = "%s_%s.dcm" % (dcm.get('SeriesNumber'),
                                          dcm.get('InstanceNumber'))
                    dicom.name = name
                    dicom.save()
                    # Only remove files successfully imported
                    #os.remove(dcm_file)

            # Note that on error we don't remove files
            except InvalidDicomError:
                message = "InvalidDicomError: %s skipping." % (dcm_file)
                batch = add_batch_error(message, batch)
            except KeyError:
                message = "KeyError: %s is possibly invalid, skipping." % (
                    dcm_file)
                batch = add_batch_error(message, batch)
            except Exception as e:
                message = "Exception: %s, for %s, skipping." % (e, dcm_file)

        # Print summary messages all at once
        for message in messages:
            bot.warning(message)

        if len(study_dates) > 1:
            message = "% study dates found for %s" % (len(study_dates),
                                                      dcm_file)
            batch = add_batch_error(message, batch)

        # Save batch thus far
        batch.qa['StudyDate'] = study_dates
        batch.qa['StartTime'] = start_time
        batch.qa['SizeBytes'] = size_bytes
        batch.save()

        # If there were no errors on import, we should remove the directory
        #if not batch.has_error:

        # Should only be called given no error, and should trigger error if not empty
        #os.rmdir(dicom_dir)

        # At the end, submit the dicoms to be anonymized as a batch
        count = batch.image_set.count()
        if count > 0:
            if ANONYMIZE_PIXELS is True:
                bot.warning(
                    "Anonimization of pixels is not yet implemented. Images were skipped."
                )
                # When this is implemented, the function will be modified to add these images
                # to the batch, which will then be first sent through a function to
                # scrub pixels before header data is looked at.
                # scrub_pixels(bid=batch.id)
            #else:
            if run_get_identifiers is True:
                bot.debug("get_identifiers submit batch %s with %s dicoms." %
                          (batch.uid, count))
                return get_identifiers(bid=batch.id)
            else:
                bot.debug("Finished batch %s with %s dicoms" %
                          (batch.uid, count))
                return batch
        else:
            # No images for further processing
            batch.status = "EMPTY"
            batch.qa['FinishTime'] = time.time()
            message = "%s is flagged EMPTY, no images pass filter" % (batch.id)
            batch = add_batch_warning(message, batch)
            batch.save()
            return

    else:
        bot.warning('Cannot find %s' % dicom_dir)
Exemplo n.º 4
0
def get_identifiers(bid, study=None, run_replace_identifiers=True):
    '''get identifiers is the celery task to get identifiers for 
    all images in a batch. A batch is a set of dicom files that may include
    more than one series/study. This is done by way of sending one restful call
    to the DASHER endpoint. If ANONYMIZE_RESTFUL is False
    under settings, this function doesn't run
    '''
    batch = Batch.objects.get(id=bid)

    if study is None:
        study = SOM_STUDY

    if ANONYMIZE_RESTFUL is True:

        images = batch.image_set.all()

        # Process all dicoms at once, one call to the API
        dicom_files = batch.get_image_paths()
        batch.change_images_status('PROCESSING')
        batch.save()  # redundant

        try:
            ids = get_ids(dicom_files=dicom_files, expand_sequences=False
                          )  # we are uploading a zip, doesn't make sense
            # to preserve image level metadata
        except FileNotFoundError:
            batch.status = "ERROR"
            message = "batch %s is missing dicom files and should be reprocessed" % (
                batch.id)
            batch = add_batch_warning(message, batch)
            batch.save()

        # Prepare identifiers with only minimal required
        # This function expects many items for one entity, returns
        # request['identifiers'] --> [ entity-with-study-item ]
        request = prepare_identifiers_request(ids)  # force: True

        bot.debug("som.client making request to anonymize batch %s" % (bid))

        # Run with retrying, in case issue with token refresh
        result = None
        try:
            result = run_client(study, request)
        except:
            # But any error, don't continue, don't launch new job
            message = "error with client, stopping job."
            batch = add_batch_error(message, batch)
            batch.status = "ERROR"
            batch.qa['FinishTime'] = time.time()
            batch.save()

        # Create a batch for all results
        if result is not None:
            if "results" in result:
                batch_ids, created = BatchIdentifiers.objects.get_or_create(
                    batch=batch)
                batch_ids.response = result['results']
                batch_ids.ids = ids
                batch_ids.save()
                batch.qa['DasherFinishTime'] = time.time()
                if run_replace_identifiers is True:
                    return replace_identifiers(bid=bid)
                else:
                    return batch_ids
            else:
                message = "'results' field not found in response: %s" % result
                batch = add_batch_error(message, batch)

    else:
        bot.debug(
            "Restful de-identification skipped [ANONYMIZE_RESTFUL is False]")
        change_status(batch, "DONEPROCESSING")
        change_status(batch.image_set.all(), "DONEPROCESSING")