def create_chunks(): """ Creates gets unaugmented positive chunks and saves them to positive_no_aug. :return: """ client = cloud.authenticate() bucket = client.get_bucket('elvos') # loop through every positive array on GCS -- no need to loop through # negatives, as those are fine in their current state for in_blob in bucket.list_blobs(prefix='chunk_data/normal/positive'): # get the file id file_id = in_blob.name.split('/')[3] file_id = file_id.split('.')[0] logging.info(f'getting {file_id}') # copy region if it's the original image, not a rotation/reflection if file_id.endswith('_1'): arr = cloud.download_array(in_blob) logging.info(f'downloading {file_id}') cloud.save_chunks_to_cloud(arr, 'normal', 'positive_no_aug', file_id)
def inspect_rois(annotations_df): """ Sanity-check function to make sure that the ROIs we're getting actually contain occlusions in them. :param annotations_df: annotations :return: """ client = cloud.authenticate() bucket = client.get_bucket('elvos') # loop through every array on GCS for in_blob in bucket.list_blobs(prefix='airflow/npy'): # if in_blob.name != 'airflow/npy/ZZX0ZNWG6Q9I18GK.npy': # continue # blacklist if in_blob.name == 'airflow/npy/LAUIHISOEZIM5ILF.npy': continue # get the file id file_id = in_blob.name.split('/')[2] file_id = file_id.split('.')[0] logging.info(f'chunking {file_id}') # copy ROI if there's a positive match in the ROI annotations roi_df = annotations_df[annotations_df['patient_id'].str.match( file_id)] # if it's empty, this brain is ELVO negative if roi_df.empty: elvo_positive = False else: elvo_positive = True arr = cloud.download_array(in_blob) # if it's elvo positive if elvo_positive: chunks = [] # get ROI location blue = int(len(arr) - roi_df['blue2'].iloc[0]) green = int(roi_df['green1'].iloc[0]) red = int(roi_df['red1'].iloc[0]) chunks.append(arr[blue:blue + 32, green:green + 50, red:red + 50]) chunks.append(arr[blue:blue + 32, red:red + 50, green:green + 50]) # Loop through all relevant chunks and show the axial, coronal, # and sagittal views to make sure there's an occlusion for chunk in chunks: axial = np.max(chunk, axis=0) coronal = np.max(chunk, axis=1) sag = np.max(chunk, axis=2) fig, ax = plt.subplots(1, 3, figsize=(6, 4)) ax[0].imshow(axial, interpolation='none') ax[1].imshow(coronal, interpolation='none') ax[2].imshow(sag, interpolation='none') plt.show()
def normal_mip(): configure_logger() client = cloud.authenticate() bucket = client.get_bucket('elvos') # iterate through every source directory... for location in WHENCE: prefix = location + '/' logging.info(f"MIPing images from {prefix}") # get every blob for in_blob in bucket.list_blobs(prefix=prefix): # blacklist if in_blob.name == prefix + 'LAUIHISOEZIM5ILF.npy': continue file_id = in_blob.name.split('/')[2] file_id = file_id.split('.')[0] # perform the normal MIPing procedure logging.info(f'downloading {in_blob.name}') input_arr = cloud.download_array(in_blob) logging.info(f"blob shape: {input_arr.shape}") # if it's a failure analysis scan, do the failure analysis MIP if file_id in FAILURE_ANALYSIS: if location == 'numpy/axial': cropped_arr = transforms.crop_normal_axial_fa(input_arr, location) # otherwise just do a normal MIP else: if location == 'numpy/axial': cropped_arr = transforms.crop_normal_axial(input_arr, location) else: cropped_arr = transforms.crop_normal_coronal(input_arr, location) # remove extremes not_extreme_arr = transforms.remove_extremes(cropped_arr) logging.info(f'removed array extremes') # MIP array mip_arr = transforms.mip_normal(not_extreme_arr) # OPTIONAL: visualize MIP # plt.figure(figsize=(6, 6)) # plt.imshow(mip_arr, interpolation='none') # plt.show() # save to cloud cloud.save_npy_to_cloud(mip_arr, file_id, location, 'normal')
def clean_data(): """ Deletes everything in chunk_data/normal/positive_no_aug :return: """ configure_logger() client = cloud.authenticate() bucket = client.get_bucket('elvos') prefix = "chunk_data/normal/positive_no_aug" logging.info(f"cleaning: deleting positive chunks from {prefix}") # delete everything for in_blob in bucket.list_blobs(prefix=prefix): in_blob.delete()
def multichannel_mip(): configure_logger() client = cloud.authenticate() bucket = client.get_bucket('elvos') # iterate through every source directory... for location in WHENCE: prefix = location + '/' logging.info(f"MIPing images from {prefix}") for in_blob in bucket.list_blobs(prefix=prefix): # blacklist if in_blob.name == prefix + 'LAUIHISOEZIM5ILF.npy': continue file_id = in_blob.name.split('/')[2] file_id = file_id.split('.')[0] # perform the normal MIPing procedure logging.info(f'downloading {in_blob.name}') input_arr = cloud.download_array(in_blob) logging.info(f"blob shape: {input_arr.shape}") if file_id in FAILURE_ANALYSIS: if location == 'numpy/axial': cropped_arr = \ transforms.crop_multichannel_axial_fa(input_arr, location) else: if location == 'numpy/axial': cropped_arr = transforms.crop_multichannel_axial( input_arr, location) else: cropped_arr = transforms.crop_multichannel_coronal( input_arr) not_extreme_arr = transforms.segment_vessels(cropped_arr) logging.info(f'removed array extremes') mip_arr = transforms.mip_multichannel(not_extreme_arr) # plt.figure(figsize=(6, 6)) # plt.imshow(mip_arr[1], interpolation='none') # plt.show() # save to the numpy generator source directory cloud.save_segmented_npy_to_cloud(mip_arr, file_id, location, 'multichannel')
def axial_to_coronal_and_sagittal(): configure_logger() client = cloud.authenticate() bucket = client.get_bucket('elvos') # for every axial scan for in_blob in bucket.list_blobs(prefix='numpy/axial'): # blacklist if in_blob.name == 'numpy/LAUIHISOEZIM5ILF.npy': continue elif in_blob.name == 'numpy/ALOUY4SF3BQKXQCZ.npy': continue elif in_blob.name == 'numpy/ABPO2BORDNF3OVL3.npy': continue # download, then transpose, then flip it to orient it correctly logging.info(f'downloading {in_blob.name}') axial = cloud.download_array(in_blob) coronal = np.transpose(axial, (1, 0, 2)) coronal = np.fliplr(coronal) sagittal = np.transpose(axial, (2, 0, 1)) sagittal = np.fliplr(sagittal) file_id = in_blob.name.split('/')[1] file_id = file_id.split('.')[0] try: # save files to GCS coronal_io = file_io.FileIO( f'gs://elvos/numpy/coronal/' f'{file_id}.npy', 'w') np.save(coronal_io, coronal) sagittal_io = file_io.FileIO( f'gs://elvos/numpy/sagittal/' f'{file_id}.npy', 'w') np.save(sagittal_io, sagittal) coronal_io.close() sagittal_io.close() except Exception as e: logging.error(f'for patient ID: {file_id} {e}') break logging.info(f'saved .npy file to cloud')
def clean_old_data(): """ Removes old upsampled positives. :return: """ client = cloud.authenticate() bucket = client.get_bucket('elvos') prefix = "chunk_data/normal/positive" # iterate through all blobs in the bucket for in_blob in bucket.list_blobs(prefix=prefix): logging.info(f'downloading {in_blob.name}') file_id = in_blob.name.split('/')[-1] file_id = file_id.split('.')[0] # delete it if it has an underscore in it if '_' in file_id: in_blob.delete()
def clean_new_data(): """ Removes non-upsampled positives :return: """ client = cloud.authenticate() bucket = client.get_bucket('elvos') prefix = "chunk_data/filtered/positive" logging.info(f"transforming positive chunks from {prefix}") # iterate through all blobs in the bucket for in_blob in bucket.list_blobs(prefix=prefix): logging.info(f'downloading {in_blob.name}') file_id = in_blob.name.split('/')[-1] file_id = file_id.split('.')[0] # if there's no underscore, delete the blob if '_' in file_id: continue in_blob.delete()
def transform_positives(): """ Script that actually transforms and upsamples all the positives. :return: """ client = cloud.authenticate() bucket = client.get_bucket('elvos') prefix = "chunk_data/filtered/positive" logging.info(f"transforming positive chunks from {prefix}") # for each blob in chunk_data/filtered/positive for in_blob in bucket.list_blobs(prefix=prefix): file_id = in_blob.name.split('/')[3] file_id = file_id.split('.')[0] # download chunk logging.info(f'downloading {in_blob.name}') input_arr = cloud.download_array(in_blob) logging.info(f"blob shape: {input_arr.shape}") # upsample chunk transform_one(input_arr, file_id)
def create_chunks(annotations_df: pd.DataFrame): """ Process and save actual chunks based off of the previously derived annotations. :param annotations_df: annotations with where the actual occlusion is :return: """ client = cloud.authenticate() bucket = client.get_bucket('elvos') # loop through every array on GCS for in_blob in bucket.list_blobs(prefix='airflow/npy'): # blacklist if in_blob.name == 'airflow/npy/LAUIHISOEZIM5ILF.npy': continue # get the file id file_id = in_blob.name.split('/')[2] file_id = file_id.split('.')[0] print(f'chunking {file_id}') # copy ROI if there's a positive match in the ROI annotations roi_df = annotations_df[annotations_df['patient_id'].str.match( file_id)] # if it's empty, this brain is ELVO negative if roi_df.empty: elvo_positive = False else: elvo_positive = True arr = cloud.download_array(in_blob) rois = [] centers = [] # if it's elvo positive if elvo_positive: # iterate through every occlusion this patient has for row in roi_df.itertuples(): """ row[0] = index row[1] = patient ID row[2] = red1 row[3] = red2 row[4] = green1 row[5] = green2 row[6] = blue1 row[7] = blue2 """ # append the lowest-valued corner of the ROI to rois rois.append((int(len(arr) - row[7]), int(row[4]), int(row[2]))) # append the center of the ROI to centers centers.append( (int(((len(arr) - row[6]) + (len(arr) - row[7])) / 2), int((row[4] + row[5]) / 2), int((row[2] + row[3]) / 2))) logging.info(rois, centers) h = 0 # loop through every chunk for i in range(0, len(arr), 32): for j in range(0, len(arr[0]), 32): for k in range(0, len(arr[0][0]), 32): found_positive = False # loop through the available ROIs and centers for roi, center in zip(rois, centers): # if the center lies within this chunk if i <= center[0] <= i + 32 \ and j <= center[1] <= j + 32 \ and k <= center[2] <= k + 32: # save the ROI and skip this block chunk = arr[roi[0]:roi[0] + 32, roi[1]:roi[1] + 32, roi[2]:roi[2] + 32] cloud.save_chunks_to_cloud(np.asarray(chunk), 'normal', 'positive', file_id + str(h)) h += 1 found_positive = True if found_positive: continue # copy the chunk chunk = arr[i:(i + 32), j:(j + 32), k:(k + 32)] # calculate the airspace airspace = np.where(chunk < -300) # if it's less than 90% airspace if (airspace[0].size / chunk.size) < 0.9: # save the label as 0 and save it to the cloud cloud.save_chunks_to_cloud(np.asarray(chunk), 'normal', 'negative', file_id + str(h)) h += 1
def create_labels(annotations_df: pd.DataFrame): """ Process and save labels for the chunks based off of previously-derived annotations. Very similar to create_chunks in methodology :param annotations_df: annotations to get labels from :return: """ client = cloud.authenticate() bucket = client.get_bucket('elvos') label_dict = {} # loop through every array on GCS for in_blob in bucket.list_blobs(prefix='airflow/npy'): # blacklist if in_blob.name == 'airflow/npy/LAUIHISOEZIM5ILF.npy': continue # get the file id file_id = in_blob.name.split('/')[2] file_id = file_id.split('.')[0] logging.info(f'labeling {file_id}') # copy ROI if there's a positive match in the ROI annotations roi_df = annotations_df[annotations_df['patient_id'].str.match( file_id)] # if it's empty, this brain is ELVO negative if roi_df.empty: elvo_positive = False else: elvo_positive = True arr = cloud.download_array(in_blob) rois = [] centers = [] # if it's elvo positive if elvo_positive: # go through each occlusion this patient has for row in roi_df.itertuples(): """ row[0] = index row[1] = patient ID row[2] = red1 row[3] = red2 row[4] = green1 row[5] = green2 row[6] = blue1 row[7] = blue2 """ # append ROI to rois rois.append((int(len(arr) - row[7]), int(row[4]), int(row[2]))) # append center to centers centers.append( (int(((len(arr) - row[6]) + (len(arr) - row[7])) / 2), int((row[4] + row[5]) / 2), int((row[2] + row[3]) / 2))) # else it's elvo negative h = 0 # loop through every chunk for i in range(0, len(arr), 32): for j in range(0, len(arr[0]), 32): for k in range(0, len(arr[0][0]), 32): found_positive = False # loop through the available ROIs and centers for roi, center in zip(rois, centers): # if the center lies within this chunk if i <= center[0] <= i + 32 \ and j <= center[1] <= j + 32 \ and k <= center[2] <= k + 32: # save the ROI and skip this block label_dict[file_id + str(h)] = 1 h += 1 found_positive = True if found_positive: continue # copy the chunk chunk = arr[i:(i + 32), j:(j + 32), k:(k + 32)] # calculate the airspace airspace = np.where(chunk < -300) # if it's less than 90% airspace if (airspace[0].size / chunk.size) < 0.9: # save the label as 0 and save it to the cloud label_dict[file_id + str(h)] = 0 h += 1 # convert the labels to a df labels_df = pd.DataFrame.from_dict(label_dict, orient='index', columns=['label']) labels_df.to_csv('annotated_labels.csv')