Exemplo n.º 1
0
 def check_dicomdir(self, event):
     '''check_dicomdir is the main function to call on a folder
     creation or modification, which both could signal new dicom directories
     '''
     if self.is_finished(event.pathname):
         bot.log("2|FINISHED: %s" % (event.pathname))
         if event.pathname.lower().startswith("test"):
             bot.log("Here would be call to import_dicomdir for %s" %
                     (event.pathname))
         else:
             # Here is the celery task to use
             import_dicomdir.apply_async(
                 kwargs={"dicom_dir": event.pathname})
     else:
         bot.log("2|NOTFINISHED: %s" % (event.pathname))
Exemplo n.º 2
0
 def process_IN_MOVED_TO(self, event):
     bot.log("1|MOVEDTO EVENT: %s" % (event.pathname))
     self.check_dicomdir(event)
Exemplo n.º 3
0
 def process_IN_CLOSE_WRITE(self, event):
     bot.log("1|CLOSEWRITE EVENT: %s" % (event.pathname))
     self.check_dicomdir(event)
Exemplo n.º 4
0
 def process_IN_MODIFY(self, event):
     bot.log("1|MODIFY EVENT: %s" % (event.pathname))
     self.check_dicomdir(event)
Exemplo n.º 5
0
def upload_storage(batch_ids=None):
    '''upload storage will as a batch, send all batches with DONEPROCESSING status
    to google cloud storage.
    '''
    from sendit.settings import (GOOGLE_CLOUD_STORAGE,
                                 SEND_TO_GOOGLE,
                                 GOOGLE_PROJECT_NAME,
                                 GOOGLE_STORAGE_COLLECTION)

    if batch_ids is None:
        batches = Batch.objects.filter(status="DONEPROCESSING")
    else:
        batches = Batch.objects.filter(status="DONEPROCESSING", id__in=batch_ids)

    # All variables must be defined for sending!
    if GOOGLE_CLOUD_STORAGE in [None,""]:
        SEND_TO_GOOGLE = False

    if GOOGLE_PROJECT_NAME in [None,""]:
        SEND_TO_GOOGLE = False

    if GOOGLE_STORAGE_COLLECTION in [None,""]:
        SEND_TO_GOOGLE = False

    if SEND_TO_GOOGLE is True:
        from deid.identifiers import get_timestamp

        try:
            client = get_client(bucket_name=GOOGLE_CLOUD_STORAGE,
                                project_name=GOOGLE_PROJECT_NAME)

        # Client is unreachable, usually network is being stressed
        # this is why we instantiate in batches to upload 
        except: #OSError and ServiceUnavailable
            bot.error("Cannot connect to client.")
            return

        # Create/get BigQuery dataset, collection should be IRB
        dataset = client.get_or_create_dataset(GOOGLE_STORAGE_COLLECTION)

        # Create a table based on ...
        table = client.get_or_create_table(dataset=dataset,    # All tables named dicom
                                           table_name='dicom',
                                           schema=dicom_schema)
        
        for batch in batches:
            valid = True
            batch.qa['UploadStartTime'] = time.time()
            batch_ids = BatchIdentifiers.objects.get(batch=batch)
            # Retrieve only images that aren't in PHI folder
            images = batch.get_finished()
            # Stop if no images pass filters
            if len(images) == 0:        
                change_status(batch,"EMPTY")
                message = "batch %s has no images for processing, stopping upload" %(bid)
                batch = add_batch_warning(message,batch)
                batch.save()
                continue

            # IR0001fa6_20160525_IR661B54.tar.gz
            # (coded MRN?)_jittereddate_studycode
            required_fields = ['AccessionNumber', 'PatientID']
            for required_field in required_fields:
                if required_field not in batch_ids.shared:
                    change_status(batch,"ERROR")
                    message = "batch ids %s do not have shared PatientID or AccessionNumber, stopping upload" %(bid)
                    batch = add_batch_warning(message,batch)
                    batch.save()
                    valid = False
                if valid is False:
                    continue

            # Add additional shared metadata
            studycode = batch_ids.shared['AccessionNumber']
            coded_mrn = batch_ids.shared['PatientID']
            batch_ids.shared['CodedPatientID'] = coded_mrn
            batch_ids.shared['ContentType'] = 'application/gzip'
            batch_ids.shared['CodedAccessionNumberID'] = studycode
            batch_ids.shared['NumberOfSeries'] = batch.qa['NumberOfSeries']
            batch_ids.shared['Series'] = batch.qa['Series']
            batch_ids.shared['RemovedSeries'] = batch.qa['FlaggedSeries']
            timestamp = get_timestamp(batch_ids.shared['StudyDate'],
                                      format = "%Y%m%d")            
            compressed_filename = "%s/%s_%s_%s.tar.gz" %(batch.get_path(),
                                                         coded_mrn,
                                                         timestamp,
                                                         studycode)
            compressed_file = generate_compressed_file(files=images, # mode="w:gz"
                                                       filename=compressed_filename) 
            # File will be None if no files added
            if compressed_file is None:        
                change_status(batch,"ERROR")
                message = "batch %s problem compressing file, stopping upload" %(bid)
                batch = add_batch_error(message,batch)
                batch.save()
                valid = False
                continue

            # We prepare shared metadata for one item
            batch_ids.shared['IMAGE_COUNT'] = len(images)
            batch.logs['IMAGE_COUNT'] = len(images)
            batch_ids.save()
            batch.save()
            if valid is True:
                metadata = deepcopy(batch_ids.shared)
                metadata['DicomHeader'] = json.dumps(metadata)
                metadata = { compressed_file: metadata }
                bot.log("Uploading %s with %s images to Google Storage %s" %(os.path.basename(compressed_file),
                                                                         len(images),
                                                                         GOOGLE_CLOUD_STORAGE))
                # We only expect to have one entity per batch
                kwargs = {"items":[compressed_file],
                          "table":table,
                          "study": SOM_STUDY,
                          "metadata": metadata,
                          "batch": False} # upload in batches at END

                # Batch metadata    
                upload_dataset(client=client, k=kwargs)

                # Clean up compressed file
                if os.path.exists(compressed_file):
                    os.remove(compressed_file)

                # Finish and record time elapsed
                change_status(batch,"DONE")

            batch.qa['UploadFinishTime'] = time.time()
            total_time = batch.qa['UploadFinishTime'] - batch.qa['UploadStartTime']
            bot.info("Total time for %s: %s images is %f min" %(batch.uid,
                                                                batch.image_set.count(),
                                                                total_time/60))
            batch.qa['ElapsedTime'] = total_time
            batch.save()

        # After image upload, metadata can be uploaded on one batch
        # If this isn't optimal, change "batch" in kwargs to False
        return client.batch.runInsert(table)
Exemplo n.º 6
0
def upload_storage(batch_ids=None):
    '''upload storage will as a batch, send all batches with DONEPROCESSING status
    to google cloud storage.
    '''
    from sendit.settings import (GOOGLE_CLOUD_STORAGE, SEND_TO_GOOGLE,
                                 GOOGLE_PROJECT_NAME, GOOGLE_PROJECT_ID_HEADER,
                                 GOOGLE_STORAGE_COLLECTION)

    if batch_ids is None:
        batches = Batch.objects.filter(status="DONEPROCESSING")
    else:
        batches = Batch.objects.filter(status="DONEPROCESSING",
                                       id__in=batch_ids)

    # All variables must be defined for sending!
    if GOOGLE_CLOUD_STORAGE in [None, ""]:
        SEND_TO_GOOGLE = False

    if GOOGLE_PROJECT_NAME in [None, ""]:
        SEND_TO_GOOGLE = False

    if GOOGLE_STORAGE_COLLECTION in [None, ""]:
        SEND_TO_GOOGLE = False

    if SEND_TO_GOOGLE is True:
        from deid.identifiers import get_timestamp

        # I'm not sure we need this
        #if GOOGLE_PROJECT_ID_HEADER is not None:
        #    client.headers["x-goog-project-id"] = GOOGLE_PROJECT_ID_HEADER
        try:
            client = get_client(bucket_name=GOOGLE_CLOUD_STORAGE,
                                project_name=GOOGLE_PROJECT_NAME)
        # Client is unreachable, usually network is being stressed

        except:  #OSError and ServiceUnavailable
            bot.error("Cannot connect to client.")
            return

        collection = client.create_collection(uid=GOOGLE_STORAGE_COLLECTION)
        for batch in batches:
            valid = True
            batch_ids = BatchIdentifiers.objects.get(batch=batch)

            # Retrieve only images that aren't in PHI folder
            images = batch.get_finished()

            # Stop if no images pass filters
            if len(images) == 0:
                change_status(batch, "EMPTY")
                message = "batch %s has no images for processing, stopping upload" % (
                    batch.id)
                batch = add_batch_warning(message, batch)
                batch.save()
                continue

            # IR0001fa6_20160525_IR661B54.tar.gz
            # (coded MRN?)_jittereddate_studycode
            required_fields = ['AccessionNumber', 'PatientID']
            for required_field in required_fields:
                if required_field not in batch_ids.shared:
                    change_status(batch, "ERROR")
                    message = "batch ids %s do not have shared PatientID or AccessionNumber, stopping upload" % (
                        bid)
                    batch = add_batch_warning(message, batch)
                    batch.save()
                    valid = False
                if valid is False:
                    continue

            studycode = batch_ids.shared['AccessionNumber']
            coded_mrn = batch_ids.shared['PatientID']
            timestamp = get_timestamp(batch_ids.shared['StudyDate'],
                                      format="%Y%m%d")

            compressed_filename = "%s/%s_%s_%s.tar.gz" % (
                batch.get_path(), coded_mrn, timestamp, studycode)
            compressed_file = generate_compressed_file(
                files=images,  # mode="w:gz"
                filename=compressed_filename)

            # File will be None if no files added
            if compressed_file is None:
                change_status(batch, "ERROR")
                message = "batch %s problem compressing file, stopping upload" % (
                    bid)
                batch = add_batch_error(message, batch)
                batch.save()
                valid = False
                continue

            # We prepare shared metadata for one item
            batch_ids.shared['IMAGE_COUNT'] = len(images)
            batch.logs['IMAGE_COUNT'] = len(images)
            batch_ids.save()
            batch.save()
            if valid is True:
                items_metadata = batch_ids.shared
                items = {compressed_file: items_metadata}
                cleaned = deepcopy(batch_ids.cleaned)
                metadata = prepare_entity_metadata(cleaned_ids=cleaned)
                bot.log("Uploading %s with %s images to Google Storage %s" %
                        (os.path.basename(compressed_file), len(images),
                         GOOGLE_CLOUD_STORAGE))
                # We only expect to have one entity per batch
                uid = list(metadata.keys())[0]
                kwargs = {
                    "images": [compressed_file],
                    "collection": collection,
                    "uid": uid,
                    "entity_metadata": metadata[uid],
                    "images_metadata": items
                }

                # Batch metadata
                upload_dataset(client=client, k=kwargs)

                # Clean up compressed file
                if os.path.exists(compressed_file):
                    os.remove(compressed_file)

                # Finish and record time elapsed
                change_status(batch, "DONE")

            batch.qa['FinishTime'] = time.time()
            total_time = batch.qa['FinishTime'] - batch.qa['StartTime']
            bot.info("Total time for %s: %s images is %f min" %
                     (batch.uid, batch.image_set.count(), total_time / 60))
            batch.qa['ElapsedTime'] = total_time
            batch.save()