def all_labels(dataset: List[str]) -> Dict[bool, int]: result = {label: 0 for label in [True, False, None]} for ct_dir in dataset: image_files = [ os.path.join(ct_dir, file_name) for file_name in next(os.walk(ct_dir))[2] ] try: is_fractured = metadata(image_files[0])['is_fractured'] except NotImplementedError: # Missing JPEG plugin is_fractured = None result[is_fractured] += 1 return result
def negatives_and_positives(ct_dirs): images_files_by_ct = image_files_for_ct(ct_dirs) positives = [] negatives = [] for ct_dir in ct_dirs: try: image_metadata = metadata(images_files_by_ct[ct_dir][0]) except NotImplementedError: # missing jpeg plugin for example continue if image_metadata['is_fractured'] is None: # No annotation available :( raise AssertionError('This should not happen anymore after the metadata rewrite') if image_metadata['is_fractured']: positives.append(ct_dir) else: negatives.append(ct_dir) return negatives, positives
def negative_and_positive_scores(ct_dirs): images_files_by_ct = image_files_for_ct(ct_dirs) positives = [] negatives = [] for ct_dir in ct_dirs: try: image_metadata = metadata(images_files_by_ct[ct_dir][0], ignore=['PixelData', 'pixel_array', 'crf', '3dcpm', 'tiff']) except NotImplementedError: # missing jpeg plugin for example continue if math.isnan(image_metadata['sum_score']): continue if image_metadata['sum_score'] > 0: positives.append(ct_dir) else: negatives.append(ct_dir) return negatives, positives
def negatives_and_positives_vertebrae(ct_dirs: List[str], ignore_missing_annotations=True): images_files_by_ct = image_files_for_ct(ct_dirs) positives = [] negatives = [] for ct_dir in ct_dirs: for vertebra in VERTEBRAE: try: image_metadata = metadata(images_files_by_ct[ct_dir][0], ignore=['PixelData', 'pixel_array', 'crf', '3dcpm', 'tiff']) except NotImplementedError: # missing jpeg plugin for example continue if ignore_missing_annotations and \ (vertebra not in image_metadata['per_vertebra_annotations'] or math.isnan(image_metadata['per_vertebra_annotations'][vertebra]['SQ Score'])): # No annotation available :( continue score = float(image_metadata['per_vertebra_annotations'][vertebra]['SQ Score']) if score == 0.: negatives.append((ct_dir, vertebra)) else: positives.append((ct_dir, vertebra)) return negatives, positives
def __next__(self): batch_idx = 0 while batch_idx < self.batch_size: if self.random_order: if random.random() < 0.5: ct_dir, vertebra = random.choice(self.positives) if ( ct_dir, vertebra ) not in self.all_vertebrae: # if we removed it from the dataset already self.positives.remove((ct_dir, vertebra)) continue else: ct_dir, vertebra = random.choice(self.negatives) if ( ct_dir, vertebra ) not in self.all_vertebrae: # if we removed it from the dataset already self.negatives.remove((ct_dir, vertebra)) continue else: ct_dir, vertebra = self.all_vertebrae[ (self.step_count * self.batch_size + batch_idx) % len(self.all_vertebrae)] image_files = self.image_files_for_ct[ct_dir] if (ct_dir, vertebra) not in self.metadata_cache: if len(image_files) > 1000: print('(skipping) volume too large: ', ct_dir, vertebra) self.all_vertebrae.remove((ct_dir, vertebra)) continue one_image_file = image_files[0] try: image_metadata = metadata(one_image_file, ignore=[ 'PixelData', 'pixel_array', 'crf', '3dcpm', 'tiff' ]) except NotImplementedError: # missing jpeg plugin for example print('(skipping) unable to load metadata for ', ct_dir, vertebra) logger.debug('unable to load metadata for ', ct_dir, vertebra) self.all_vertebrae.remove((ct_dir, vertebra)) continue if (vertebra not in image_metadata['per_vertebra_annotations'] or math.isnan( image_metadata['per_vertebra_annotations'] [vertebra]['SQ Score'])): print('(skipping) no annotation available for ', ct_dir, vertebra) logger.debug('no annotation available for ', ct_dir, vertebra) self.all_vertebrae.remove((ct_dir, vertebra)) continue # if patient_number_from_long_string(ct_dir) != '3009': # print('(skipping) wrong patient ', ct_dir, vertebra) # all_vertebrae.remove((ct_dir, vertebra)) # continue try: assert image_metadata is not None age = float(image_metadata.get('PatientAge', '000Y')[:3]) except ValueError: age = 70 self.metadata_cache[(ct_dir, vertebra)] = { 'PatientAge': age, # 'PatientSex': image_metadata['PatientSex'], **{ key: image_metadata['per_vertebra_annotations'][vertebra][key] for key in self.label_names }, } for key in self.label_names: assert not math.isnan( float(self.metadata_cache[(ct_dir, vertebra)][key])) image_metadata = self.metadata_cache[(ct_dir, vertebra)] try: volume = vertebra_volume( ct_dir, vertebra, desired_size_mm=self.required_size_mm, pixel_scaling=self.pixel_scaling, coordinates_from=self.coordinates_from) # coordinate order is now UD, FB, LR if self.random_flip_lr and random.random() < 0.5: volume = np.flip(volume, axis=2) offset = self.random_shift_px actual_shift = [ random.randint(-self.random_shift_px, self.random_shift_px) + offset for _ in volume.shape ] volume = volume[ # actual_shift[2]:actual_shift[2] + math.ceil(self.desired_size_mm[2] / MADER_SPACING[2]), actual_shift[1]:actual_shift[1] + math.ceil(self.desired_size_mm[1] / MADER_SPACING[1]), actual_shift[0]:actual_shift[0] + math.ceil(self.desired_size_mm[0] / MADER_SPACING[0]), ] # cut volume except MemoryError: print('(skipping) memory error for ', ct_dir) logger.debug('memory error for ', ct_dir, vertebra) self.all_vertebrae.remove((ct_dir, vertebra)) continue except MissingTiffError: print('(skipping) tiff not available for ', ct_dir, vertebra) logger.debug('tiff not available for ', ct_dir, vertebra) self.all_vertebrae.remove((ct_dir, vertebra)) continue if self.random_noise_percent != 0: volume = multiplicative_gaussian_noise( volume, self.random_noise_percent) # add the data to the batch if self.volumes is None: self.volumes = np.zeros((self.batch_size, *volume.shape, 1), dtype=self.floatx()) self.volumes[batch_idx, :, :, 0] = volume age = image_metadata['PatientAge'] if self.include_age else 0 sex = 0 #1 if image_metadata['PatientSex'] == 'M' and self.include_sex else 0 self.relevant_metadata[batch_idx, :] = [age / 100, sex] # the label is_fractured = bool(image_metadata['SQ Score']) self.y1[batch_idx] = [float(is_fractured)] self.y2[batch_idx] = [ float(image_metadata[label]) for label in self.label_names ] self.label_weights[batch_idx] = float(self.weights[is_fractured]) self.names[batch_idx] = (ct_dir, vertebra) batch_idx += 1 assert self.volumes is not None assert self.volumes.shape[0] == self.batch_size assert self.relevant_metadata.shape[0] == self.batch_size assert self.y1.shape[0] == self.batch_size assert self.y2.shape[0] == self.batch_size x = [self.volumes, self.relevant_metadata] y = [self.y1, self.y2] assert not np.isnan(np.sum(self.volumes)) assert not np.isnan(np.sum(self.y1)) assert not np.isnan(np.sum(self.y2)) assert not np.isnan(np.sum(self.relevant_metadata)) assert not np.isnan(np.sum(self.label_weights)) batch = [x, y, [self.label_weights, self.label_weights]] if self.mixup_rate > 0: apply_mixup(batch, self.mixup_rate, self.batch_size) self.step_count += 1 if self.with_names: return (*batch, self.names) else: return tuple(batch)
def vertebra_volume( ct_dir: str, vertebra, desired_size_mm, interpolator=MADER_INTERPOLATION, # mader uses b_spline pixel_scaling='divide_by_2k', coordinates_from='tiff_2d_center'): spacings = list(MADER_SPACING) image_files = image_files_for_ct([ct_dir])[ct_dir] volume: numpy.ndarray = spaced_ct_volume(image_files, desired_spacings=MADER_SPACING, interpolator=interpolator, pixel_scaling=pixel_scaling) volume = numpy.rot90(volume, 3) one_image_file = image_files[0] patient_number = patient_number_from_long_string(image_files[0]) if coordinates_from == 'tiff_2d_center': image_metadata = metadata( one_image_file, ignore=['PixelData', 'pixel_array', 'crf', '3dcpm']) tiff_metadata = image_metadata['per_vertebra_annotations'][vertebra][ 'tiff_metadata'] tiff_center_x_px = (image_metadata['per_vertebra_annotations'] [vertebra]['Morphometry Point1X'] + image_metadata['per_vertebra_annotations'] [vertebra]['Morphometry Point4X']) / 2 tiff_center_y_px = (image_metadata['per_vertebra_annotations'] [vertebra]['Morphometry Point1Y'] + image_metadata['per_vertebra_annotations'] [vertebra]['Morphometry Point4Y']) / 2 assert isinstance( image_metadata['per_vertebra_annotations'][vertebra]['Flip LR'], int) assert isinstance( image_metadata['per_vertebra_annotations'][vertebra]['Flip UD'], int) if image_metadata['per_vertebra_annotations'][vertebra]['Flip LR']: tiff_center_x_px = (image_metadata['per_vertebra_annotations'] [vertebra]['tiff_metadata']['ImageWidth'][0] - tiff_center_x_px) if image_metadata['per_vertebra_annotations'][vertebra]['Flip UD']: tiff_center_y_px = (image_metadata['per_vertebra_annotations'] [vertebra]['tiff_metadata']['ImageLength'][0] - tiff_center_y_px) center_px = ( # volume.shape[2] // 2, round(tiff_center_x_px / (tiff_metadata['XResolution'][0][0] / 1e6) / spacings[0]), round(tiff_center_y_px / (tiff_metadata['YResolution'][0][0] / 1e6) / spacings[1]), ) else: raise NotImplementedError() # from PIL import Image # import pandas as pd # import numpy as np # file_loc = "/Users/kavya/Documents/MasterProject/3D_Vertebrae_detection/DiagBilanz_Fx_Status_Radiologist_20190604.xlsx" # data_loc = "/Users/kavya/Documents/MasterProject/3D_Vertebrae_detection/Fertig 20190503" # cropped_loc = "/Users/kavya/Documents/MasterProject/3D_Vertebrae_detection/cropped" # im = Image.open(data_loc + "/1001/1001_UKSH_KIEL_RADIOLOGIE_NEURORAD_KVP80_cExp129.399_PixSp0-1_20Transversals.tif", # mode='r') # # im.show() # np_im = np.array(im) # # df = pd.read_excel(file_loc, header=1, usecols="B,AJ,AP,AX,AZ,BA,BW,BX,CC,CD") # print(df.head()) # print(df.shape) # print(df['Patients Name'].iloc[0]) # print(df['Filename'].iloc[0]) # # SHAPE = 512 # # for i in range(df.shape[0]): # if (df['Patients Name'].iloc[i] == 1008): # continue # file_fetched = df['Filename'].iloc[i] # file_index = file_fetched.rfind(str(df['Patients Name'].iloc[i])) # file_standard = file_fetched[file_index:] # # print(file_standard) # im = Image.open(data_loc + '/' + str(df['Patients Name'].iloc[i]) + '/' + file_standard, mode='r') # # im.show() # Point2X = (SHAPE - df['Morphometry Point2X'].iloc[i]) - 4 # Point2Y = (df['Morphometry Point2Y'].iloc[i]) - 4 # Point5X = (SHAPE - df['Morphometry Point5X'].iloc[i]) + 4 # Point5Y = (df['Morphometry Point5Y'].iloc[i]) + 4 # box = (Point2X, Point2Y, Point5X, Point5Y) # print(box) # cropped_image = im.crop(box) # # cropped_image.show() # # crop_im = np.array(cropped_image) # # print(cropped_image) # if ((df['SQ Score'].iloc[i]) == 0): # cropped_image.save( # cropped_loc + '/' + 'Healthy' + '/' + str(df['Patients Name'].iloc[i]) + '_' + df['Label'].iloc[ # i] + '.tif') # else: # cropped_image.save( # cropped_loc + '/' + 'Fracture' + '/' + str(df['Patients Name'].iloc[i]) + '_' + df['Label'].iloc[ # i] + '.tif') # mader uses reverse coordinate order from ours # desired_size_mm = desired_size_mm[::-1] # spacings = spacings[::-1] # center_px = center_px[::-1] for center, current_length in zip(center_px, volume.shape): assert 0 <= center < current_length desired_size_px = tuple( round(size / spacing) for size, spacing in zip(desired_size_mm, spacings)) def _clip(x, minimum, maximum): return max(minimum, min(x, maximum)) # patch around center slices = [ slice( _clip(center - floor(desired / 2), minimum=0, maximum=current_length - desired), _clip(center - floor(desired / 2), minimum=0, maximum=current_length - desired) + desired, ) for center, desired, current_length in zip( center_px, desired_size_px, volume.shape) ] filename = 'img/generated/full_spine_center/{0}.png'.format(patient_number) if not os.path.isfile(filename): os.makedirs('img/generated/full_spine_center/', exist_ok=True) imageio.imwrite(filename, volume[:, :]) # print("Initial volume shape {}".format(volume.shape)) volume = volume[slices[0], slices[1]] # view_3d_volume(numpy.swapaxes(volume[slices[0], slices[1], slices[2]], axis1=0, axis2=2)) # view_3d_volume(numpy.swapaxes(volume, axis1=0, axis2=2)) # filename = 'img/generated/diff/{0}{1}.png'.format(patient_number, vertebra) # if not os.path.isfile(filename): # os.makedirs('img/generated/diff/', exist_ok=True) # imageio.imwrite(filename, volume[:, :]) # print("Slice volume shape {}".format(volume.shape)) # if image_metadata['PatientPosition'] == 'FFS': # print('FFS in', patient_number) assert volume.shape == desired_size_px return volume # vertebra_volume(ALL_CT_DIRS[15], 'T4',(50, 50), coordinates_from='tiff_2d_center')
def spaced_ct_volume( image_files, desired_spacings=MADER_SPACING, # Spacing used by O. Mader interpolator=MADER_INTERPOLATION, swap_axes=None, pixel_scaling='divide'): if swap_axes is None: swap_axes = SWAP_AXES # To get the format used by O. Mader volume = ct_volume(shape='original', image_files=image_files, shape_mode='original').astype(backend.floatx()) if interpolator == 'b_spline': interpolator = SimpleITK.sitkBSpline elif interpolator == 'nn': interpolator = SimpleITK.sitkNearestNeighbor elif interpolator == 'linear': interpolator = SimpleITK.sitkLinear elif interpolator == 'lanczos_windowed_sinc': interpolator = SimpleITK.sitkLanczosWindowedSinc else: raise NotImplementedError('Unknown interpolation method') one_image_file = image_files[0] image_metadata = metadata(one_image_file) # spacings = (*image_metadata['PixelSpacing'], image_metadata['SpacingBetweenSlices'],) # spacing in mm original_shape = volume.shape # in pixel new_size: numpy.ndarray = numpy.ceil( numpy.array([original for original in original_shape])).astype(int) for ax1, ax2 in swap_axes: volume = numpy.swapaxes(volume, ax1, ax2) img_itk: SimpleITK.Image = SimpleITK.GetImageFromArray(volume) # img_itk.SetSpacing(spacings) resampler = SimpleITK.ResampleImageFilter() resampler.SetSize(new_size.tolist()) resampler.SetInterpolator(interpolator) resampler.SetOutputSpacing(desired_spacings) img_itk = resampler.Execute(img_itk) assert img_itk.GetSpacing() == desired_spacings volume = SimpleITK.GetArrayFromImage(img_itk) if pixel_scaling == 'range': volume -= numpy.min(volume) # minimum is 0 now volume /= numpy.max(volume) # maximum is 1 now volume -= 0.5 # range is -0.5 to 0.5 now volume *= 2 # range is -1 to 1 now assert numpy.min(volume) == -1 assert numpy.max(volume) == 1 elif pixel_scaling == 'range01': volume -= numpy.min(volume) # minimum is 0 now volume /= numpy.max(volume) # maximum is 1 now assert numpy.min(volume) == 0 assert numpy.max(volume) == 1 elif pixel_scaling == 'divide': volume /= 256 elif pixel_scaling == 'divide_by_2k': volume /= 2048 else: raise NotImplementedError() return volume
] if names: image_files = [ os.path.sep.join([os.path.sep.join(n_split), names[-1]]) ] else: image_files = [image_files[0]] image_file = image_files[0] img = Image.open(image_file) # np_im = np.asarray(img) patient_number = patient_number_from_long_string(image_file) image_metadata = metadata( image_file, ignore=['PixelData', 'pixel_array', 'crf', '3dcpm']) image_metadata = image_metadata['per_vertebra_annotations'] for v in image_metadata.keys(): tiff_metadata = image_metadata[v]['tiff_metadata'] cen_x_px = (image_metadata[v]['Morphometry Point1X'] + image_metadata[v]['Morphometry Point4X']) / 2 cen_y_px = (image_metadata[v]['Morphometry Point1Y'] + image_metadata[v]['Morphometry Point4Y']) / 2 assert isinstance(image_metadata[v]['Flip LR'], int) assert isinstance(image_metadata[v]['Flip UD'], int) if image_metadata[v]['Flip LR']: cen_x_px = (tiff_metadata['ImageWidth'][0] - cen_x_px) if image_metadata[v]['Flip UD']: cen_y_px = (tiff_metadata['ImageLength'][0] - cen_y_px)