def load_without_size_preprocessing(input_folder, cv_fold_num, train_test,
                                    idx):

    image_folders_list, label_folders_list, patient_id_list = get_patient_folders(
        input_folder, cv_fold_num)

    # ==============================
    # First, get ed_es_diff for this subject
    # ==============================
    for item in os.listdir(image_folders_list[train_test][idx // 2][:-8]):
        if 'list.txt' in item:
            text_file = open(
                image_folders_list[train_test][idx // 2][:-8] + item, "r")
            slice_ids_with_annotations = []
            for l in text_file.readlines():
                slice_ids_with_annotations.append(int(float(l[-25:-21])))
            text_file.close()

    for slice_id in np.unique(slice_ids_with_annotations):
        if slice_id % 20 != 0:
            ed_es_diff = int(slice_id % 20)
            continue

    # ==============================
    # read image and label
    # ==============================
    image_ED, image_ES, px, py, pz = read_image(
        image_folders_list[train_test][idx // 2], ed_es_diff)
    label_ED, label_ES = read_label(label_folders_list[train_test][idx // 2],
                                    image_ED.shape, ed_es_diff)

    img_ED = image_ED.copy()
    img_ES = image_ES.copy()
    lab_ED = label_ED.copy()
    lab_ES = label_ES.copy()

    # ============
    # normalize the image to be between 0 and 1
    # ============
    img_ED = utils.normalise_image(img_ED, norm_type='div_by_max')
    img_ES = utils.normalise_image(img_ES, norm_type='div_by_max')

    # ============
    # decide if ES or ED needs to be returned
    # ============
    print((idx % 2))
    if int(idx % 2) is 0:
        image = img_ED
        label = lab_ED
    else:
        image = img_ES
        label = lab_ES

    return image, label
예제 #2
0
def load_without_size_preprocessing(preproc_folder,
                                    patient_id):
                    
    # ==================
    # read bias corrected image and ground truth segmentation
    # ==================
    filepath_bias_corrected_nii_format = preproc_folder + 'Case' + patient_id + '_n4.nii.gz'
    filepath_seg_nii_format = preproc_folder + 'Case' + patient_id + '_segmentation.nii.gz'
    
    # ================================    
    # read bias corrected image
    # ================================    
    image = utils.load_nii(filepath_bias_corrected_nii_format)[0]

    # ================================    
    # normalize the image
    # ================================    
    image = utils.normalise_image(image, norm_type='div_by_max')

    # ================================    
    # read the labels
    # ================================    
    label = utils.load_nii(filepath_seg_nii_format)[0]            
    
    # ================================    
    # skimage io with simple ITKplugin was used to read the images in the convert_to_nii_and_correct_bias_field function.
    # this lead to the arrays being read as z-x-y
    # move the axes appropriately, so that the resolution read above is correct for the corresponding axes.
    # ================================    
    image = np.swapaxes(np.swapaxes(image, 0, 1), 1, 2)
    label = np.swapaxes(np.swapaxes(label, 0, 1), 1, 2)
    
    return image, label
def load_without_size_preprocessing(input_folder,
                                    cv_fold_num,
                                    train_test,
                                    idx):
    
    file_list = get_file_list(input_folder,
                              cv_fold_num)
    
    image_file = file_list[train_test][idx]
    
    # ============
    # read image and normalize it to be between 0 and 1
    # ============
    image_dat = utils.load_nii(image_file)
    image = image_dat[0].copy()
    image = utils.normalise_image(image, norm_type='div_by_max')
    
    # ============
    # read label and set RV label to 1, others to 0
    # ============
    label_file = image_file.split('_n4.nii.gz')[0] + '_gt.nii.gz'
    label_dat = utils.load_nii(label_file)
    label = label_dat[0].copy()
    label[label!=1] = 0
        
    return image, label
예제 #4
0
def load_without_size_preprocessing(input_folder, site_name, idx, depth):

    # ========================
    # read the filenames
    # ========================
    filenames = sorted(glob.glob(input_folder + site_name + '/*/'))

    # ==================
    # get file paths
    # ==================
    patient_name, image_path, label_path = get_image_and_label_paths(
        filenames[idx])

    # ============
    # read the image and normalize it to be between 0 and 1
    # ============
    image, _, image_hdr = utils.load_nii(image_path)
    image = np.swapaxes(
        image, 1, 2
    )  # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets

    # ==================
    # read the label file
    # ==================
    label, _, _ = utils.load_nii(label_path)
    label = np.swapaxes(
        label, 1, 2
    )  # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets
    label = utils.group_segmentation_classes(
        label)  # group the segmentation classes as required

    # ============
    # create a segmentation mask and use it to get rid of the skull in the image
    # ============
    label_mask = np.copy(label)
    label_mask[label > 0] = 1
    image = image * label_mask

    # ==================
    # crop out some portion of the image, which are all zeros (rough registration via visual inspection)
    # ==================
    if site_name is 'CALTECH':
        image = image[:, 80:, :]
        label = label[:, 80:, :]
    elif site_name is 'STANFORD':
        image, label = center_image_and_label(image, label)

    # ==================
    # crop volume along z axis (as there are several zeros towards the ends)
    # ==================
    image = utils.crop_or_pad_volume_to_size_along_z(image, depth)
    label = utils.crop_or_pad_volume_to_size_along_z(label, depth)

    # ==================
    # normalize the image
    # ==================
    image = utils.normalise_image(image, norm_type='div_by_max')

    return image, label
예제 #5
0
    def log(self, writer, inputs, outputs, losses):
        print('logging')
        writer.add_scalar('lr', self.lr, self.step)

        # write to tensorboard
        for loss_type, loss in losses.items():
            writer.add_scalar('{}'.format(loss_type), loss, self.step)

        for i in range(min(4, len(inputs['image']))):

            writer.add_image('image_l/{}'.format(i),
                             normalise_image(inputs['image'][i]), self.step)
            writer.add_image('image_r/{}'.format(i),
                             normalise_image(inputs['stereo_image'][i]),
                             self.step)

            if inputs.get('disparity') is not None:
                writer.add_image('disp_target/{}'.format(i),
                                 normalise_image(inputs['disparity'][i]),
                                 self.step)

                warped_image = self.warp_stereo_image(
                    inputs['stereo_image'][i].cpu(),
                    inputs['disparity'][i].cpu())
                writer.add_image('warped_gt_image/{}'.format(i),
                                 normalise_image(warped_image), self.step)

            if inputs.get('mono_disparity') is not None:
                writer.add_image('mono_disparity/{}'.format(i),
                                 normalise_image(inputs['mono_disparity'][i]),
                                 self.step)

            if inputs.get('occlusion_mask') is not None:
                writer.add_image('occlusion_mask/{}'.format(i),
                                 normalise_image(inputs['occlusion_mask'][i]),
                                 self.step)

            writer.add_image('disp_pred/{}'.format(i),
                             normalise_image(outputs[('disp', 0)][i]),
                             self.step)

            warped_image = self.warp_stereo_image(
                inputs['stereo_image'][i].cpu(), outputs[('disp', 0)][i].cpu())
            writer.add_image('warped_image/{}'.format(i),
                             normalise_image(warped_image), self.step)
def load_without_size_preprocessing(input_folder, idx, labeller):

    # ===============================
    # read all the patient folders from the base input folder
    # ===============================
    folder_list = []
    for folder in os.listdir(input_folder):
        folder_path = os.path.join(input_folder, folder)
        if os.path.isdir(folder_path) and 't2_tse_tra.nii.gz' in os.listdir(
                folder_path):
            if 'segmentation_' + labeller + '.nii.gz' in os.listdir(
                    folder_path
            ) or 'segmentation_tra_' + labeller + '.nii.gz' in os.listdir(
                    folder_path):
                folder_list.append(folder_path)

    # ==================
    # read the image file
    # ==================
    image, _, _ = utils.load_nii(folder_list[idx] + '/t2_tse_tra_n4.nii.gz')
    # ============
    # normalize the image to be between 0 and 1
    # ============
    image = utils.normalise_image(image, norm_type='div_by_max')

    # ==================
    # read the label file
    # ==================
    if 'segmentation_' + labeller + '.nii.gz' in os.listdir(folder_list[idx]):
        label, _, _ = utils.load_nii(folder_list[idx] + '/segmentation_' +
                                     labeller + '.nii.gz')
    elif 'segmentation_tra_' + labeller + '.nii.gz' in os.listdir(
            folder_list[idx]):
        label, _, _ = utils.load_nii(folder_list[idx] + '/segmentation_tra_' +
                                     labeller + '.nii.gz')
    # ==================
    # remove extra label from some images
    # ==================
    label[label > 2] = 0

    return image, label


# ===============================================================
# End of file
# ===============================================================
def load_without_size_preprocessing(input_folder,
                                    cv_fold_num,
                                    train_test,
                                    idx):
    
    # ===============================
    # read all the patient folders from the base input folder
    # ===============================
    image_folder = os.path.join(input_folder, 'Prostate-3T')
    label_folder = os.path.join(input_folder, 'NCI_ISBI_Challenge-Prostate3T_Training_Segmentations')
    folder_list = get_patient_folders(image_folder,
                                      folder_base='Prostate3T-01',
                                      cv_fold_number = cv_fold_num)
    folder = folder_list[train_test][idx]

    # ==================
    # make a list of all dcm images for this subject
    # ==================                        
    lstFilesDCM = []  # create an empty list
    for dirName, subdirList, fileList in os.walk(folder):
        for filename in fileList:
            if ".dcm" in filename.lower():  # check whether the file's DICOM
                lstFilesDCM.append(os.path.join(dirName, filename))
                
    # ==================
    # read bias corrected image
    # ==================
    nifti_img_path = lstFilesDCM[0][:lstFilesDCM[0].rfind('/')+1]
    image = utils.load_nii(img_path = nifti_img_path + 'img_n4.nii.gz')[0]

    # ============
    # normalize the image to be between 0 and 1
    # ============
    image = utils.normalise_image(image, norm_type='div_by_max')

    # ==================
    # read the label file
    # ==================        
    label = utils.load_nii(img_path = nifti_img_path + 'lbl.nii.gz')[0]
    
    return image, label
예제 #8
0
def load_without_size_preprocessing(input_folder, idx, protocol,
                                    preprocessing_folder, depth):

    # ========================
    # read the filenames
    # ========================
    filenames = sorted(glob.glob(input_folder + '*.zip'))

    # ==================
    # get file paths
    # ==================
    patient_name, image_path, label_path = get_image_and_label_paths(
        filenames[idx], protocol, preprocessing_folder)

    # ============
    # read the image and normalize it to be between 0 and 1
    # ============
    image, _, image_hdr = utils.load_nii(image_path)
    image = np.swapaxes(
        image, 1, 2
    )  # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets
    image = utils.normalise_image(image, norm_type='div_by_max')

    # ==================
    # read the label file
    # ==================
    label, _, _ = utils.load_nii(label_path)
    label = np.swapaxes(
        label, 1, 2
    )  # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets
    label = utils.group_segmentation_classes(
        label)  # group the segmentation classes as required

    # ==================
    # crop volume along z axis (as there are several zeros towards the ends)
    # ==================
    image = utils.crop_or_pad_volume_to_size_along_z(image, depth)
    label = utils.crop_or_pad_volume_to_size_along_z(label, depth)

    return image, label
예제 #9
0
def prepare_data(input_folder,
                 output_file,
                 idx_start,
                 idx_end,
                 protocol,
                 size,
                 depth,
                 target_resolution,
                 preprocessing_folder):

    # ========================    
    # read the filenames
    # ========================
    filenames = sorted(glob.glob(input_folder + '*.zip'))
    logging.info('Number of images in the dataset: %s' % str(len(filenames)))

    # =======================
    # =======================
    hdf5_file = h5py.File(output_file, "w")

    # ===============================
    # Create datasets for images and labels
    # ===============================
    data = {}
    num_subjects = idx_end - idx_start
    
    data['images'] = hdf5_file.create_dataset("images", [num_subjects] + list(size), dtype=np.float32)
    data['labels'] = hdf5_file.create_dataset("labels", [num_subjects] + list(size), dtype=np.uint8)
    
    # ===============================
    # initialize lists
    # ===============================        
    label_list = []
    image_list = []
    nx_list = []
    ny_list = []
    nz_list = []
    px_list = []
    py_list = []
    pz_list = []
    pat_names_list = []
    
    # ===============================        
    # initiate counter
    # ===============================        
    patient_counter = 0
    
    # ===============================
    # iterate through the requested indices
    # ===============================
    for idx in range(idx_start, idx_end):
        
        # ==================
        # get file paths
        # ==================
        patient_name, image_path, label_path = get_image_and_label_paths(filenames[idx],
                                                                         protocol,
                                                                         preprocessing_folder)
        
        # ============
        # read the image and normalize it to be between 0 and 1
        # ============
        image, _, image_hdr = utils.load_nii(image_path)
        image = np.swapaxes(image, 1, 2) # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets
        
        # ==================
        # read the label file
        # ==================        
        label, _, _ = utils.load_nii(label_path)        
        label = np.swapaxes(label, 1, 2) # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets
        label = utils.group_segmentation_classes(label) # group the segmentation classes as required
        
        # ==================
        # collect some header info.
        # ==================
        px_list.append(float(image_hdr.get_zooms()[0]))
        py_list.append(float(image_hdr.get_zooms()[2])) # since axes 1 and 2 have been swapped
        pz_list.append(float(image_hdr.get_zooms()[1]))
        nx_list.append(image.shape[0]) 
        ny_list.append(image.shape[2]) # since axes 1 and 2 have been swapped
        nz_list.append(image.shape[1])
        pat_names_list.append(patient_name)
        
        # ==================
        # crop volume along z axis (as there are several zeros towards the ends)
        # ==================
        image = utils.crop_or_pad_volume_to_size_along_z(image, depth)
        label = utils.crop_or_pad_volume_to_size_along_z(label, depth)     
        
        # ==================
        # normalize the image
        # ==================
        image_normalized = utils.normalise_image(image, norm_type='div_by_max')
                        
        # ======================================================  
        # rescale, crop / pad to make all images of the required size and resolution
        # ======================================================
        scale_vector = [image_hdr.get_zooms()[0] / target_resolution[0],
                        image_hdr.get_zooms()[2] / target_resolution[1],
                        image_hdr.get_zooms()[1] / target_resolution[2]] # since axes 1 and 2 have been swapped
        
        image_rescaled = transform.rescale(image_normalized,
                                           scale_vector,
                                           order=1,
                                           preserve_range=True,
                                           multichannel=False,
                                           mode = 'constant')

        label_onehot = utils.make_onehot_(label, nlabels=15)

        label_onehot_rescaled = transform.rescale(label_onehot,
                                                  scale_vector,
                                                  order=1,
                                                  preserve_range=True,
                                                  multichannel=True,
                                                  mode='constant')
        
        label_rescaled = np.argmax(label_onehot_rescaled, axis=-1)
        
        # ==================================
        # go through each z slice, crop or pad to a constant size and then append the resized 
        # this will ensure that the axes get arranged in the same orientation as they were during the 2d preprocessing
        # ==================================
        image_rescaled_cropped = []
        label_rescaled_cropped = []
        for zz in range(image_rescaled.shape[2]):
            image_rescaled_cropped.append(utils.crop_or_pad_slice_to_size(image_rescaled[:,:,zz], size[1], size[2]))
            label_rescaled_cropped.append(utils.crop_or_pad_slice_to_size(label_rescaled[:,:,zz], size[1], size[2]))
        image_rescaled_cropped = np.array(image_rescaled_cropped)
        label_rescaled_cropped = np.array(label_rescaled_cropped)

        # ============   
        # append to list
        # ============   
        image_list.append(image_rescaled_cropped)
        label_list.append(label_rescaled_cropped)

        # ============   
        # write to file
        # ============   
        _write_range_to_hdf5(data,
                             image_list,
                             label_list,
                             patient_counter,
                             patient_counter+1)
        
        _release_tmp_memory(image_list,
                            label_list)
        
        # update counter
        patient_counter += 1

    # Write the small datasets
    hdf5_file.create_dataset('nx', data=np.asarray(nx_list, dtype=np.uint16))
    hdf5_file.create_dataset('ny', data=np.asarray(ny_list, dtype=np.uint16))
    hdf5_file.create_dataset('nz', data=np.asarray(nz_list, dtype=np.uint16))
    hdf5_file.create_dataset('px', data=np.asarray(px_list, dtype=np.float32))
    hdf5_file.create_dataset('py', data=np.asarray(py_list, dtype=np.float32))
    hdf5_file.create_dataset('pz', data=np.asarray(pz_list, dtype=np.float32))
    hdf5_file.create_dataset('patnames', data=np.asarray(pat_names_list, dtype="S10"))
    
    # After test train loop:
    hdf5_file.close()
예제 #10
0
def prepare_data(
        input_folder,
        output_file,
        mode,
        size,  # for 3d: (nz, nx, ny), for 2d: (nx, ny)
        target_resolution,  # for 3d: (px, py, pz), for 2d: (px, py)
        cv_fold_num):
    '''
    Main function that prepares a dataset from the raw challenge data to an hdf5 dataset
    '''

    assert (mode in ['2D', '3D']), 'Unknown mode: %s' % mode
    if mode == '2D' and not len(size) == 2:
        raise AssertionError('Inadequate number of size parameters')
    if mode == '3D' and not len(size) == 3:
        raise AssertionError('Inadequate number of size parameters')
    if mode == '2D' and not len(target_resolution) == 2:
        raise AssertionError(
            'Inadequate number of target resolution parameters')
    if mode == '3D' and not len(target_resolution) == 3:
        raise AssertionError(
            'Inadequate number of target resolution parameters')

    # ============
    # create an empty hdf5 file
    # ============
    hdf5_file = h5py.File(output_file, "w")

    # ============
    # create empty lists for filling header info
    # ============
    diag_list = {'test': [], 'train': [], 'validation': []}
    height_list = {'test': [], 'train': [], 'validation': []}
    weight_list = {'test': [], 'train': [], 'validation': []}
    patient_id_list = {'test': [], 'train': [], 'validation': []}
    cardiac_phase_list = {'test': [], 'train': [], 'validation': []}
    nx_list = {'test': [], 'train': [], 'validation': []}
    ny_list = {'test': [], 'train': [], 'validation': []}
    nz_list = {'test': [], 'train': [], 'validation': []}
    px_list = {'test': [], 'train': [], 'validation': []}
    py_list = {'test': [], 'train': [], 'validation': []}
    pz_list = {'test': [], 'train': [], 'validation': []}

    file_list = {'test': [], 'train': [], 'validation': []}
    num_slices = {'test': 0, 'train': 0, 'validation': 0}

    # ============
    # go through all images and get header info.
    # one round of parsing is done just to get all the header info. The size info is used to create empty fields for the images and labels, with the required sizes.
    # Then, another round of reading the images and labels is done, which are pre-processed and written into the hdf5 file
    # ============
    for folder in os.listdir(input_folder):

        folder_path = os.path.join(input_folder, folder)

        if os.path.isdir(folder_path):

            # ============
            # train_test_validation split
            # ============
            train_test = test_train_val_split(patient_id=int(folder[-3:]),
                                              cv_fold_number=cv_fold_num)

            infos = {}
            for line in open(os.path.join(folder_path, 'Info.cfg')):
                label, value = line.split(':')
                infos[label] = value.rstrip('\n').lstrip(' ')

            patient_id = folder.lstrip('patient')

            # ============
            # reading this patient's image and collecting header info
            # ============
            for file in glob.glob(
                    os.path.join(folder_path, 'patient???_frame??_n4.nii.gz')):

                # ============
                # list with file paths
                # ============
                file_list[train_test].append(file)

                diag_list[train_test].append(diagnosis_dict[infos['Group']])
                weight_list[train_test].append(infos['Weight'])
                height_list[train_test].append(infos['Height'])

                patient_id_list[train_test].append(patient_id)

                systole_frame = int(infos['ES'])
                diastole_frame = int(infos['ED'])

                file_base = file.split('.')[0]
                frame = int(file_base.split('frame')[-1][:-3])
                if frame == systole_frame:
                    cardiac_phase_list[train_test].append(1)  # 1 == systole
                elif frame == diastole_frame:
                    cardiac_phase_list[train_test].append(2)  # 2 == diastole
                else:
                    cardiac_phase_list[train_test].append(
                        0)  # 0 means other phase

                nifty_img = nib.load(file)
                nx_list[train_test].append(nifty_img.shape[0])
                ny_list[train_test].append(nifty_img.shape[1])
                nz_list[train_test].append(nifty_img.shape[2])
                num_slices[train_test] += nifty_img.shape[2]
                py_list[train_test].append(
                    nifty_img.header.structarr['pixdim'][2])
                px_list[train_test].append(
                    nifty_img.header.structarr['pixdim'][1])
                pz_list[train_test].append(
                    nifty_img.header.structarr['pixdim'][3])

    # ============
    # writing the small datasets
    # ============
    for tt in ['test', 'train', 'validation']:
        hdf5_file.create_dataset('diagnosis_%s' % tt,
                                 data=np.asarray(diag_list[tt],
                                                 dtype=np.uint8))
        hdf5_file.create_dataset('weight_%s' % tt,
                                 data=np.asarray(weight_list[tt],
                                                 dtype=np.float32))
        hdf5_file.create_dataset('height_%s' % tt,
                                 data=np.asarray(height_list[tt],
                                                 dtype=np.float32))
        hdf5_file.create_dataset('patient_id_%s' % tt,
                                 data=np.asarray(patient_id_list[tt],
                                                 dtype=np.uint8))
        hdf5_file.create_dataset('cardiac_phase_%s' % tt,
                                 data=np.asarray(cardiac_phase_list[tt],
                                                 dtype=np.uint8))
        hdf5_file.create_dataset('nz_%s' % tt,
                                 data=np.asarray(nz_list[tt], dtype=np.uint16))
        hdf5_file.create_dataset('ny_%s' % tt,
                                 data=np.asarray(ny_list[tt], dtype=np.uint16))
        hdf5_file.create_dataset('nx_%s' % tt,
                                 data=np.asarray(nx_list[tt], dtype=np.uint16))
        hdf5_file.create_dataset('py_%s' % tt,
                                 data=np.asarray(py_list[tt],
                                                 dtype=np.float32))
        hdf5_file.create_dataset('px_%s' % tt,
                                 data=np.asarray(px_list[tt],
                                                 dtype=np.float32))
        hdf5_file.create_dataset('pz_%s' % tt,
                                 data=np.asarray(pz_list[tt],
                                                 dtype=np.float32))

    # ============
    # setting sizes according to 2d or 3d
    # ============
    if mode == '3D':  # size [num_patients, nz, nx, ny]
        nz_max, nx, ny = size
        n_train = len(file_list['train'])  # number of patients
        n_test = len(file_list['test'])
        n_val = len(file_list['validation'])

    elif mode == '2D':  # size [num_z_slices_across_all_patients, nx, ny]
        nx, ny = size
        n_test = num_slices['test']
        n_train = num_slices['train']
        n_val = num_slices['validation']

    else:
        raise AssertionError('Wrong mode setting. This should never happen.')

    # ============
    # creating datasets for images and labels
    # ============
    data = {}
    for tt, num_points in zip(['test', 'train', 'validation'],
                              [n_test, n_train, n_val]):

        if num_points > 0:
            data['images_%s' % tt] = hdf5_file.create_dataset(
                "images_%s" % tt, [num_points] + list(size), dtype=np.float32)
            data['labels_%s' % tt] = hdf5_file.create_dataset(
                "labels_%s" % tt, [num_points] + list(size), dtype=np.uint8)

    image_list = {'test': [], 'train': [], 'validation': []}
    label_list = {'test': [], 'train': [], 'validation': []}

    for train_test in ['test', 'train', 'validation']:

        write_buffer = 0
        counter_from = 0
        patient_counter = 0

        for image_file in file_list[train_test]:

            patient_counter += 1

            logging.info('============================================')
            logging.info('Doing: %s' % image_file)

            # ============
            # read image
            # ============
            image_dat = utils.load_nii(image_file)
            image = image_dat[0].copy()

            # ============
            # normalize the image to be between 0 and 1
            # ============
            image = utils.normalise_image(image, norm_type='div_by_max')

            # ============
            # read label
            # ============
            file_base = image_file.split('_n4.nii.gz')[0]
            label_file = file_base + '_gt.nii.gz'
            label_dat = utils.load_nii(label_file)
            label = label_dat[0].copy()

            # ============
            # set RV label to 1 and other labels to 0, as the RVSC dataset only has labels for the RV
            # original labels: 0 bachground, 1 right ventricle, 2 myocardium, 3 left ventricle
            # ============
            # label[label!=1] = 0

            # ============
            # original pixel size (px, py, pz)
            # ============
            pixel_size = (image_dat[2].structarr['pixdim'][1],
                          image_dat[2].structarr['pixdim'][2],
                          image_dat[2].structarr['pixdim'][3])

            # ========================================================================
            # PROCESSING LOOP FOR 3D DATA
            # ========================================================================
            if mode == '3D':

                # rescaling ratio
                scale_vector = [
                    pixel_size[0] / target_resolution[0],
                    pixel_size[1] / target_resolution[1],
                    pixel_size[2] / target_resolution[2]
                ]

                # ==============================
                # rescale image and label
                # ==============================
                image_scaled = transform.rescale(image,
                                                 scale_vector,
                                                 order=1,
                                                 preserve_range=True,
                                                 multichannel=False,
                                                 mode='constant')
                label_scaled = transform.rescale(label,
                                                 scale_vector,
                                                 order=0,
                                                 preserve_range=True,
                                                 multichannel=False,
                                                 mode='constant')

                # ==============================
                # ==============================
                image_scaled = utils.crop_or_pad_volume_to_size_along_z(
                    image_scaled, nz_max)
                label_scaled = utils.crop_or_pad_volume_to_size_along_z(
                    label_scaled, nz_max)

                # ==============================
                # nz_max is the z-dimension provided in the 'size' parameter
                # ==============================
                image_vol = np.zeros((nx, ny, nz_max), dtype=np.float32)
                label_vol = np.zeros((nx, ny, nz_max), dtype=np.uint8)

                # ===============================
                # going through each z slice
                # ===============================
                for zz in range(nz_max):

                    image_slice = image_scaled[:, :, zz]
                    label_slice = label_scaled[:, :, zz]

                    # cropping / padding with zeros the x-y slice at this z location
                    image_slice_cropped = utils.crop_or_pad_slice_to_size(
                        image_slice, nx, ny)
                    label_slice_cropped = utils.crop_or_pad_slice_to_size(
                        label_slice, nx, ny)

                    image_vol[:, :, zz] = image_slice_cropped
                    label_vol[:, :, zz] = label_slice_cropped

                # ===============================
                # swap axes to maintain consistent orientation as compared to 2d pre-processing
                # ===============================
                image_vol = image_vol.swapaxes(0, 2).swapaxes(1, 2)
                label_vol = label_vol.swapaxes(0, 2).swapaxes(1, 2)

                # ===============================
                # append to list that will be written to the hdf5 file
                # ===============================
                image_list[train_test].append(image_vol)
                label_list[train_test].append(label_vol)

                write_buffer += 1

                # ===============================
                # writing the images and labels pre-processed so far to the hdf5 file
                # ===============================
                if write_buffer >= MAX_WRITE_BUFFER:

                    counter_to = counter_from + write_buffer
                    _write_range_to_hdf5(data, train_test, image_list,
                                         label_list, counter_from, counter_to)
                    _release_tmp_memory(image_list, label_list, train_test)

                    # reset stuff for next iteration
                    counter_from = counter_to
                    write_buffer = 0

            # ========================================================================
            # PROCESSING LOOP FOR SLICE-BY-SLICE 2D DATA
            # ========================================================================
            elif mode == '2D':

                scale_vector = [
                    pixel_size[0] / target_resolution[0],
                    pixel_size[1] / target_resolution[1]
                ]

                # ===============================
                # go through each z slice, rescale and crop and append.
                # in this process, the z axis will become the zeroth axis
                # ===============================
                for zz in range(image.shape[2]):

                    image_slice = np.squeeze(image[:, :, zz])
                    label_slice = np.squeeze(label[:, :, zz])

                    image_slice_rescaled = transform.rescale(
                        image_slice,
                        scale_vector,
                        order=1,
                        preserve_range=True,
                        multichannel=False,
                        mode='constant')
                    label_slice_rescaled = transform.rescale(
                        label_slice,
                        scale_vector,
                        order=0,
                        preserve_range=True,
                        multichannel=False,
                        mode='constant')

                    image_slice_cropped = utils.crop_or_pad_slice_to_size(
                        image_slice_rescaled, nx, ny)
                    label_slice_cropped = utils.crop_or_pad_slice_to_size(
                        label_slice_rescaled, nx, ny)

                    image_list[train_test].append(image_slice_cropped)
                    label_list[train_test].append(label_slice_cropped)

                    write_buffer += 1

                    # Writing needs to happen inside the loop over the slices
                    if write_buffer >= MAX_WRITE_BUFFER:

                        counter_to = counter_from + write_buffer
                        _write_range_to_hdf5(data, train_test, image_list,
                                             label_list, counter_from,
                                             counter_to)
                        _release_tmp_memory(image_list, label_list, train_test)

                        # reset stuff for next iteration
                        counter_from = counter_to
                        write_buffer = 0

        logging.info('Writing remaining data')
        counter_to = counter_from + write_buffer

        _write_range_to_hdf5(data, train_test, image_list, label_list,
                             counter_from, counter_to)
        _release_tmp_memory(image_list, label_list, train_test)

    # After test train loop:
    hdf5_file.close()
예제 #11
0
def prepare_data(input_folder, output_file, idx_start, idx_end, protocol, size,
                 depth, target_resolution, preprocessing_folder):

    # ========================
    # read the filenames
    # ========================
    folders_list = sorted(glob.glob(input_folder + '/*/'))
    logging.info('Number of images in the dataset: %s' %
                 str(len(folders_list)))

    # =======================
    # create a hdf5 file
    # =======================
    hdf5_file = h5py.File(output_file, "w")

    # ===============================
    # Create datasets for images and labels
    # ===============================
    data = {}
    num_slices = count_slices(folders_list, idx_start, idx_end, depth)

    data['images'] = hdf5_file.create_dataset("images",
                                              [num_slices] + list(size),
                                              dtype=np.float32)
    data['labels'] = hdf5_file.create_dataset("labels",
                                              [num_slices] + list(size),
                                              dtype=np.uint8)

    # ===============================
    # initialize lists
    # ===============================
    label_list = []
    image_list = []
    nx_list = []
    ny_list = []
    nz_list = []
    px_list = []
    py_list = []
    pz_list = []
    pat_names_list = []

    # ===============================
    # ===============================
    write_buffer = 0
    counter_from = 0

    # ===============================
    # iterate through the requested indices
    # ===============================
    for idx in range(idx_start, idx_end):

        # ==================
        # get file paths
        # ==================
        patient_name, image_path, label_path = get_image_and_label_paths(
            folders_list[idx])

        # ============
        # read the image and normalize it to be between 0 and 1
        # ============
        image, _, image_hdr = utils.load_nii(image_path)
        image = np.swapaxes(
            image, 1, 2
        )  # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets

        # ==================
        # read the label file
        # ==================
        label, _, _ = utils.load_nii(label_path)
        label = np.swapaxes(
            label, 1, 2
        )  # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets
        # labels have already been grouped as required

        # ============
        # create a segmentation mask and use it to get rid of the skull in the image
        # ============
        label_mask = np.copy(label)
        label_mask[label > 0] = 1
        image = image * label_mask

        # ==================
        # crop out some portion of the image, which are all zeros (rough registration via visual inspection)
        # ==================
        image, label = center_image_and_label(image, label)

        # plt.figure(); plt.imshow(image[:,:,50], cmap='gray'); plt.title(patient_name); plt.show(); plt.close()

        # ==================
        # crop volume along z axis (as there are several zeros towards the ends)
        # ==================
        image = utils.crop_or_pad_volume_to_size_along_z(image, depth)
        label = utils.crop_or_pad_volume_to_size_along_z(label, depth)

        # ==================
        # collect some header info.
        # ==================
        px_list.append(float(image_hdr.get_zooms()[0]))
        py_list.append(
            float(image_hdr.get_zooms()[2])
        )  # since axes 1 and 2 have been swapped. this is important when dealing with pixel dimensions
        pz_list.append(float(image_hdr.get_zooms()[1]))
        nx_list.append(image.shape[0])
        ny_list.append(
            image.shape[1]
        )  # since axes 1 and 2 have been swapped. however, only the final axis locations are relevant when dealing with shapes
        nz_list.append(image.shape[2])
        pat_names_list.append(patient_name)

        # ==================
        # normalize the image
        # ==================
        image_normalized = utils.normalise_image(image, norm_type='div_by_max')

        # ======================================================
        ### PROCESSING LOOP FOR SLICE-BY-SLICE 2D DATA ###################
        # ======================================================
        scale_vector = [
            image_hdr.get_zooms()[0] / target_resolution[0],
            image_hdr.get_zooms()[2] / target_resolution[1]
        ]  # since axes 1 and 2 have been swapped. this is important when dealing with pixel dimensions

        for zz in range(image.shape[2]):

            # ============
            # rescale the images and labels so that their orientation matches that of the nci dataset
            # ============
            image2d_rescaled = rescale(np.squeeze(image_normalized[:, :, zz]),
                                       scale_vector,
                                       order=1,
                                       preserve_range=True,
                                       multichannel=False,
                                       mode='constant')

            label2d_rescaled = rescale(np.squeeze(label[:, :, zz]),
                                       scale_vector,
                                       order=0,
                                       preserve_range=True,
                                       multichannel=False,
                                       mode='constant')

            # ============
            # rotate to align with other datasets
            # ============
            image2d_rescaled_rotated = np.rot90(image2d_rescaled, k=0)
            label2d_rescaled_rotated = np.rot90(label2d_rescaled, k=0)

            # ============
            # crop or pad to make of the same size
            # ============
            image2d_rescaled_rotated_cropped = utils.crop_or_pad_slice_to_size(
                image2d_rescaled_rotated, size[0], size[1])
            label2d_rescaled_rotated_cropped = utils.crop_or_pad_slice_to_size(
                label2d_rescaled_rotated, size[0], size[1])

            # ============
            # append to list
            # ============
            image_list.append(image2d_rescaled_rotated_cropped)
            label_list.append(label2d_rescaled_rotated_cropped)

            write_buffer += 1

            # Writing needs to happen inside the loop over the slices
            if write_buffer >= MAX_WRITE_BUFFER:

                counter_to = counter_from + write_buffer

                _write_range_to_hdf5(data, image_list, label_list,
                                     counter_from, counter_to)

                _release_tmp_memory(image_list, label_list)

                # update counters
                counter_from = counter_to
                write_buffer = 0

    logging.info('Writing remaining data')
    counter_to = counter_from + write_buffer
    _write_range_to_hdf5(data, image_list, label_list, counter_from,
                         counter_to)
    _release_tmp_memory(image_list, label_list)

    # Write the small datasets
    hdf5_file.create_dataset('nx', data=np.asarray(nx_list, dtype=np.uint16))
    hdf5_file.create_dataset('ny', data=np.asarray(ny_list, dtype=np.uint16))
    hdf5_file.create_dataset('nz', data=np.asarray(nz_list, dtype=np.uint16))
    hdf5_file.create_dataset('px', data=np.asarray(px_list, dtype=np.float32))
    hdf5_file.create_dataset('py', data=np.asarray(py_list, dtype=np.float32))
    hdf5_file.create_dataset('pz', data=np.asarray(pz_list, dtype=np.float32))
    hdf5_file.create_dataset('patnames',
                             data=np.asarray(pat_names_list, dtype="S10"))

    # After test train loop:
    hdf5_file.close()
def prepare_data(input_image_folder, input_mask_folder, output_file, size,
                 target_resolution):
    '''
    Main function that prepares a dataset from the raw challenge data to an hdf5 dataset
    '''

    hdf5_file = h5py.File(output_file, "w")

    expert_list = [
        'Readings_AH', 'Readings_EK', 'Readings_KC', 'Readings_KS',
        'Readings_OD', 'Readings_UM'
    ]
    num_annotators = len(expert_list)

    logging.info('Counting files and parsing meta data...')
    patient_id_list = {'test': [], 'train': [], 'validation': []}

    image_file_list = {'test': [], 'train': [], 'validation': []}
    mask_file_list = {'test': [], 'train': [], 'validation': []}

    num_slices = {'test': 0, 'train': 0, 'validation': 0}

    logging.info('Counting files and parsing meta data...')

    for folder in os.listdir(input_image_folder):

        folder_path = os.path.join(input_image_folder, folder)
        if os.path.isdir(folder_path) and folder.startswith('888'):

            patient_id = int(folder.lstrip('888'))

            if patient_id == 9:
                logging.info(
                    'WARNING: Skipping case 9, because one annotation has wrong dimensions...'
                )
                continue

            if patient_id % 5 == 0:
                train_test = 'test'
            elif patient_id % 4 == 0:
                train_test = 'validation'
            else:
                train_test = 'train'

            file_path = os.path.join(folder_path, 't2_tse_tra.nii.gz')

            annotator_mask_list = []
            for exp in expert_list:
                mask_folder = os.path.join(input_mask_folder, exp)
                file = glob.glob(
                    os.path.join(mask_folder,
                                 '*' + str(patient_id).zfill(4) + '_*.nii.gz'))
                # for ii in range(len(file)):
                #     if 'NCI' in file[ii]:
                #         del file[ii]
                assert len(
                    file
                ) == 1, 'more or less than one file matches the glob pattern %s' % (
                    '*' + str(patient_id).zfill(5) + '*.nii.gz')
                annotator_mask_list.append(file[0])

            mask_file_list[train_test].append(annotator_mask_list)
            image_file_list[train_test].append(file_path)

            patient_id_list[train_test].append(patient_id)

            nifty_img = nib.load(file_path)
            num_slices[train_test] += nifty_img.shape[2]

    # Write the small datasets
    for tt in ['test', 'train', 'validation']:
        hdf5_file.create_dataset('patient_id_%s' % tt,
                                 data=np.asarray(patient_id_list[tt],
                                                 dtype=np.uint8))

    nx, ny = size
    n_test = num_slices['test']
    n_train = num_slices['train']
    n_val = num_slices['validation']

    print('Debug: Check if sets add up to correct value:')
    print(n_train, n_val, n_test, n_train + n_val + n_test)

    # Create datasets for images and masks
    data = {}
    for tt, num_points in zip(['test', 'train', 'validation'],
                              [n_test, n_train, n_val]):

        if num_points > 0:
            data['images_%s' % tt] = hdf5_file.create_dataset(
                "images_%s" % tt, [num_points] + list(size), dtype=np.float32)
            data['masks_%s' % tt] = hdf5_file.create_dataset(
                "masks_%s" % tt, [num_points] + list(size) + [num_annotators],
                dtype=np.uint8)

    mask_list = {'test': [], 'train': [], 'validation': []}
    img_list = {'test': [], 'train': [], 'validation': []}

    logging.info('Parsing image files')

    for train_test in ['test', 'train', 'validation']:

        write_buffer = 0
        counter_from = 0

        patient_counter = 0
        for img_file, mask_files in zip(image_file_list[train_test],
                                        mask_file_list[train_test]):

            patient_counter += 1

            logging.info(
                '-----------------------------------------------------------')
            logging.info('Doing: %s' % img_file)

            img_dat = utils.load_nii(img_file)
            img = img_dat[0]

            masks = []
            for mf in mask_files:
                mask_dat = utils.load_nii(mf)
                masks.append(mask_dat[0])
            masks_arr = np.asarray(masks)  # annotator, size_x, size_y, size_z
            masks_arr = masks_arr.transpose(
                (1, 2, 3, 0))  # size_x, size_y, size_z, annotator

            img = utils.normalise_image(img)

            pixel_size = (img_dat[2].structarr['pixdim'][1],
                          img_dat[2].structarr['pixdim'][2],
                          img_dat[2].structarr['pixdim'][3])

            logging.info('Pixel size:')
            logging.info(pixel_size)

            scale_vector = [
                pixel_size[0] / target_resolution[0],
                pixel_size[1] / target_resolution[1]
            ]

            for zz in range(img.shape[2]):

                slice_img = np.squeeze(img[:, :, zz])
                slice_rescaled = transform.rescale(slice_img,
                                                   scale_vector,
                                                   order=1,
                                                   preserve_range=True,
                                                   multichannel=False,
                                                   mode='constant')

                slice_mask = np.squeeze(masks_arr[:, :, zz, :])
                mask_rescaled = transform.rescale(slice_mask,
                                                  scale_vector,
                                                  order=0,
                                                  preserve_range=True,
                                                  multichannel=True,
                                                  mode='constant')

                slice_cropped = crop_or_pad_slice_to_size(
                    slice_rescaled, nx, ny)
                mask_cropped = crop_or_pad_slice_to_size(mask_rescaled, nx, ny)

                # REMOVE SEMINAL VESICLES
                mask_cropped[mask_cropped == 3] = 0

                # DEBUG
                # import matplotlib.pyplot as plt
                # plt.figure()
                # plt.imshow(slice_img)
                #
                # plt.figure()
                # plt.imshow(slice_rescaled)
                #
                # plt.figure()
                # plt.imshow(slice_cropped)
                #
                # plt.show()
                # END DEBUG

                img_list[train_test].append(slice_cropped)
                mask_list[train_test].append(mask_cropped)

                write_buffer += 1

                # Writing needs to happen inside the loop over the slices
                if write_buffer >= MAX_WRITE_BUFFER:
                    counter_to = counter_from + write_buffer
                    _write_range_to_hdf5(data, train_test, img_list, mask_list,
                                         counter_from, counter_to)
                    _release_tmp_memory(img_list, mask_list, train_test)

                    # reset stuff for next iteration
                    counter_from = counter_to
                    write_buffer = 0

        # after file loop: Write the remaining data

        logging.info('Writing remaining data')
        counter_to = counter_from + write_buffer

        _write_range_to_hdf5(data, train_test, img_list, mask_list,
                             counter_from, counter_to)
        _release_tmp_memory(img_list, mask_list, train_test)

    # After test train loop:
    hdf5_file.close()
예제 #13
0
def prepare_data(input_folder,
                 output_file,
                 idx_start,
                 idx_end,
                 protocol,
                 size,
                 depth,
                 target_resolution,
                 preprocessing_folder):

    # ========================    
    # read the filenames
    # ========================
    filenames = sorted(glob.glob(input_folder + '*.zip'))
    logging.info('Number of images in the dataset: %s' % str(len(filenames)))

    # =======================
    # create a new hdf5 file
    # =======================
    hdf5_file = h5py.File(output_file, "w")

    # ===============================
    # Create datasets for images and labels
    # ===============================
    data = {}
    num_slices = count_slices(filenames,
                              idx_start,
                              idx_end,
                              protocol,
                              preprocessing_folder,
                              depth)
    
    # ===============================
    # the sizes of the image and label arrays are set as: [(number of coronal slices per subject*number of subjects), size of coronal slices]
    # ===============================
    data['images'] = hdf5_file.create_dataset("images", [num_slices] + list(size), dtype=np.float32)
    data['labels'] = hdf5_file.create_dataset("labels", [num_slices] + list(size), dtype=np.uint8)
    
    # ===============================
    # initialize lists
    # ===============================        
    label_list = []
    image_list = []
    nx_list = []
    ny_list = []
    nz_list = []
    px_list = []
    py_list = []
    pz_list = []
    pat_names_list = []
    
    # ===============================      
    # initialize counters
    # ===============================        
    write_buffer = 0
    counter_from = 0
    
    # ===============================
    # iterate through the requested indices
    # ===============================
    for idx in range(idx_start, idx_end):
        
        # ==================
        # get file paths
        # ==================
        patient_name, image_path, label_path = get_image_and_label_paths(filenames[idx],
                                                                         protocol,
                                                                         preprocessing_folder)
        
        # ============
        # read the image and normalize it to be between 0 and 1
        # ============
        image, _, image_hdr = utils.load_nii(image_path)
        image = np.swapaxes(image, 1, 2) # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets
        
        # ==================
        # read the label file
        # ==================        
        label, _, _ = utils.load_nii(label_path)        
        label = np.swapaxes(label, 1, 2) # swap axes 1 and 2 -> this allows appending along axis 2, as in other datasets
        label = utils.group_segmentation_classes(label) # group the segmentation classes as required
                
        # ==================
        # crop volume along z axis (as there are several zeros towards the ends)
        # ==================
        image = utils.crop_or_pad_volume_to_size_along_z(image, depth)
        label = utils.crop_or_pad_volume_to_size_along_z(label, depth)     

        # ==================
        # collect some header info.
        # ==================
        px_list.append(float(image_hdr.get_zooms()[0]))
        py_list.append(float(image_hdr.get_zooms()[2])) # since axes 1 and 2 have been swapped
        pz_list.append(float(image_hdr.get_zooms()[1]))
        nx_list.append(image.shape[0]) 
        ny_list.append(image.shape[1]) # since axes 1 and 2 have been swapped
        nz_list.append(image.shape[2])
        pat_names_list.append(patient_name)
        
        # ==================
        # normalize the image
        # ==================
        image_normalized = utils.normalise_image(image, norm_type='div_by_max')
                        
        # ======================================================  
        ### PROCESSING LOOP FOR SLICE-BY-SLICE 2D DATA ###################
        # ======================================================
        scale_vector = [image_hdr.get_zooms()[0] / target_resolution[0],
                        image_hdr.get_zooms()[2] / target_resolution[1]] # since axes 1 and 2 have been swapped

        for zz in range(image.shape[2]):

            # ============
            # rescale the images and labels so that their orientation matches that of the nci dataset
            # ============            
            image2d_rescaled = rescale(np.squeeze(image_normalized[:, :, zz]),
                                                  scale_vector,
                                                  order=1,
                                                  preserve_range=True,
                                                  multichannel=False,
                                                  mode = 'constant')
 
            label2d_rescaled = rescale(np.squeeze(label[:, :, zz]),
                                                  scale_vector,
                                                  order=0,
                                                  preserve_range=True,
                                                  multichannel=False,
                                                  mode='constant')
            
            # ============            
            # crop or pad to make of the same size
            # ============            
            image2d_rescaled_rotated_cropped = utils.crop_or_pad_slice_to_size(image2d_rescaled, size[0], size[1])
            label2d_rescaled_rotated_cropped = utils.crop_or_pad_slice_to_size(label2d_rescaled, size[0], size[1])

            # ============   
            # append to list
            # ============   
            image_list.append(image2d_rescaled_rotated_cropped)
            label_list.append(label2d_rescaled_rotated_cropped)

            # ============   
            # increment counter
            # ============   
            write_buffer += 1

            # ============   
            # Writing needs to happen inside the loop over the slices
            # ============   
            if write_buffer >= MAX_WRITE_BUFFER:

                counter_to = counter_from + write_buffer

                _write_range_to_hdf5(data,
                                     image_list,
                                     label_list,
                                     counter_from,
                                     counter_to)
                
                _release_tmp_memory(image_list,
                                    label_list)

                # ============   
                # update counters 
                # ============   
                counter_from = counter_to
                write_buffer = 0
        
    # ============   
    # write leftover data
    # ============   
    logging.info('Writing remaining data')
    counter_to = counter_from + write_buffer
    _write_range_to_hdf5(data,
                         image_list,
                         label_list,
                         counter_from,
                         counter_to)
    _release_tmp_memory(image_list,
                        label_list)

    # ============   
    # Write the small datasets - image resolutions, sizes, patient ids
    # ============   
    hdf5_file.create_dataset('nx', data=np.asarray(nx_list, dtype=np.uint16))
    hdf5_file.create_dataset('ny', data=np.asarray(ny_list, dtype=np.uint16))
    hdf5_file.create_dataset('nz', data=np.asarray(nz_list, dtype=np.uint16))
    hdf5_file.create_dataset('px', data=np.asarray(px_list, dtype=np.float32))
    hdf5_file.create_dataset('py', data=np.asarray(py_list, dtype=np.float32))
    hdf5_file.create_dataset('pz', data=np.asarray(pz_list, dtype=np.float32))
    hdf5_file.create_dataset('patnames', data=np.asarray(pat_names_list, dtype="S10"))
    
    # ============   
    # close the hdf5 file
    # ============   
    hdf5_file.close()
def prepare_data(input_folder, output_file, mode, size, target_resolution,
                 cv_fold_num):
    '''
    Main function that prepares a dataset from the raw challenge data to an hdf5 dataset
    '''

    hdf5_file = h5py.File(output_file, "w")

    cardiac_phase_list = {'test': [], 'train': [], 'validation': []}
    nx_list = {'test': [], 'train': [], 'validation': []}
    ny_list = {'test': [], 'train': [], 'validation': []}
    nz_list = {'test': [], 'train': [], 'validation': []}
    px_list = {'test': [], 'train': [], 'validation': []}
    py_list = {'test': [], 'train': [], 'validation': []}
    pz_list = {'test': [], 'train': [], 'validation': []}

    num_slices = {'test': 0, 'train': 0, 'validation': 0}

    # ==============================
    # read all images and save header info
    # ==============================
    image_folders_list, label_folders_list, patient_id_list = get_patient_folders(
        input_folder, cv_fold_num)

    for tt in ['train', 'test', 'validation']:

        for sub_id in range(len(image_folders_list[tt])):

            image_details = get_image_details(image_folders_list[tt][sub_id])

            for _ in range(2):  # append details for 2 volumes - ED and ES

                px_list[tt].append(image_details[0])
                py_list[tt].append(image_details[1])
                pz_list[tt].append(image_details[2])

                nx_list[tt].append(image_details[3])
                ny_list[tt].append(image_details[4])
                nz_list[tt].append(image_details[5])

                num_slices[tt] += image_details[5]

            cardiac_phase_list[tt].append(1)  # 'ES'
            cardiac_phase_list[tt].append(2)  # 'ED'

    # ==============================
    # Write the small datasets
    # ==============================
    for tt in ['test', 'train', 'validation']:
        hdf5_file.create_dataset('patient_id_%s' % tt,
                                 data=np.asarray(patient_id_list[tt],
                                                 dtype=np.uint8))
        hdf5_file.create_dataset('cardiac_phase_%s' % tt,
                                 data=np.asarray(cardiac_phase_list[tt],
                                                 dtype=np.uint8))
        hdf5_file.create_dataset('nz_%s' % tt,
                                 data=np.asarray(nz_list[tt], dtype=np.uint16))
        hdf5_file.create_dataset('ny_%s' % tt,
                                 data=np.asarray(ny_list[tt], dtype=np.uint16))
        hdf5_file.create_dataset('nx_%s' % tt,
                                 data=np.asarray(nx_list[tt], dtype=np.uint16))
        hdf5_file.create_dataset('py_%s' % tt,
                                 data=np.asarray(py_list[tt],
                                                 dtype=np.float32))
        hdf5_file.create_dataset('px_%s' % tt,
                                 data=np.asarray(px_list[tt],
                                                 dtype=np.float32))
        hdf5_file.create_dataset('pz_%s' % tt,
                                 data=np.asarray(pz_list[tt],
                                                 dtype=np.float32))

    # ==============================
    # set dimensions for the hdf5 file
    # ==============================
    nx, ny = size
    n_test = num_slices['test']
    n_train = num_slices['train']
    n_val = num_slices['validation']

    # ==============================
    # Create datasets for images and labels
    # ==============================
    data = {}
    for tt, num_points in zip(['test', 'train', 'validation'],
                              [n_test, n_train, n_val]):

        if num_points > 0:
            data['images_%s' % tt] = hdf5_file.create_dataset(
                "images_%s" % tt, [num_points] + list(size), dtype=np.float32)
            data['labels_%s' % tt] = hdf5_file.create_dataset(
                "labels_%s" % tt, [num_points] + list(size), dtype=np.uint8)

    label_list = {'test': [], 'train': [], 'validation': []}
    image_list = {'test': [], 'train': [], 'validation': []}

    # ==============================
    # read each image and label
    # ==============================
    logging.info('Parsing image files')

    for train_test in ['test', 'train', 'validation']:

        write_buffer = 0
        counter_from = 0
        patient_counter = 0

        for sub_num in range(len(image_folders_list[train_test])):

            patient_counter += 1

            logging.info('============================================')
            logging.info('Doing: %s' % image_folders_list[train_test][sub_num])

            # ==============================
            # First, get ed_es_diff for this subject
            # ==============================
            for item in os.listdir(
                    image_folders_list[train_test][sub_num][:-8]):
                if 'list.txt' in item:
                    text_file = open(
                        image_folders_list[train_test][sub_num][:-8] + item,
                        "r")
                    slice_ids_with_annotations = []
                    for l in text_file.readlines():
                        slice_ids_with_annotations.append(
                            int(float(l[-25:-21])))
                    text_file.close()

            for slice_id in np.unique(slice_ids_with_annotations):
                if slice_id % 20 != 0:
                    ed_es_diff = int(slice_id % 20)
                    continue

            # ==============================
            # read image and label
            # ==============================
            image_ED, image_ES, px, py, pz = read_image(
                image_folders_list[train_test][sub_num],
                ed_es_diff,
                nifti_available=False)
            label_ED, label_ES = read_label(
                label_folders_list[train_test][sub_num],
                image_ED.shape,
                ed_es_diff,
                nifti_available=False)

            img_ED = image_ED.copy()
            img_ES = image_ES.copy()
            lab_ED = label_ED.copy()
            lab_ES = label_ES.copy()

            # ============
            # normalize the image to be between 0 and 1
            # ============
            img_ED = utils.normalise_image(img_ED, norm_type='div_by_max')
            img_ES = utils.normalise_image(img_ES, norm_type='div_by_max')

            pixel_size = (px, py, pz)
            scale_vector = [
                pixel_size[0] / target_resolution[0],
                pixel_size[1] / target_resolution[1]
            ]

            # ============
            # rescale and write to hdf5 the ED image and label
            # ============
            for zz in range(img_ED.shape[2]):

                slice_rescaled_ED = transform.rescale(np.squeeze(img_ED[:, :,
                                                                        zz]),
                                                      scale_vector,
                                                      order=1,
                                                      preserve_range=True,
                                                      multichannel=False,
                                                      mode='constant')

                label_rescaled_ED = transform.rescale(np.squeeze(lab_ED[:, :,
                                                                        zz]),
                                                      scale_vector,
                                                      order=0,
                                                      preserve_range=True,
                                                      multichannel=False,
                                                      mode='constant')

                slice_cropped_ED = utils.crop_or_pad_slice_to_size(
                    slice_rescaled_ED, nx, ny)
                label_cropped_ED = utils.crop_or_pad_slice_to_size(
                    label_rescaled_ED, nx, ny)

                # ============
                # rotation by 90 degrees
                # ============
                slice_cropped_ED = np.rot90(slice_cropped_ED, k=-1)
                label_cropped_ED = np.rot90(label_cropped_ED, k=-1)

                image_list[train_test].append(slice_cropped_ED)
                label_list[train_test].append(label_cropped_ED)

                write_buffer += 1

                # Writing needs to happen inside the loop over the slices
                if write_buffer >= MAX_WRITE_BUFFER:

                    counter_to = counter_from + write_buffer
                    _write_range_to_hdf5(data, train_test, image_list,
                                         label_list, counter_from, counter_to)
                    _release_tmp_memory(image_list, label_list, train_test)

                    # reset stuff for next iteration
                    counter_from = counter_to
                    write_buffer = 0

            # ============
            # rescale and write to hdf5 the ES image and label
            # ============
            for zz in range(img_ES.shape[2]):

                slice_rescaled_ES = transform.rescale(np.squeeze(img_ES[:, :,
                                                                        zz]),
                                                      scale_vector,
                                                      order=1,
                                                      preserve_range=True,
                                                      multichannel=False,
                                                      mode='constant')

                label_rescaled_ES = transform.rescale(np.squeeze(lab_ES[:, :,
                                                                        zz]),
                                                      scale_vector,
                                                      order=0,
                                                      preserve_range=True,
                                                      multichannel=False,
                                                      mode='constant')

                slice_cropped_ES = utils.crop_or_pad_slice_to_size(
                    slice_rescaled_ES, nx, ny)
                label_cropped_ES = utils.crop_or_pad_slice_to_size(
                    label_rescaled_ES, nx, ny)

                # ============
                # rotation by 90 degrees
                # ============
                slice_cropped_ES = np.rot90(slice_cropped_ES, k=-1)
                label_cropped_ES = np.rot90(label_cropped_ES, k=-1)

                image_list[train_test].append(slice_cropped_ES)
                label_list[train_test].append(label_cropped_ES)

                write_buffer += 1

                # Writing needs to happen inside the loop over the slices
                if write_buffer >= MAX_WRITE_BUFFER:

                    counter_to = counter_from + write_buffer
                    _write_range_to_hdf5(data, train_test, image_list,
                                         label_list, counter_from, counter_to)
                    _release_tmp_memory(image_list, label_list, train_test)

                    # reset stuff for next iteration
                    counter_from = counter_to
                    write_buffer = 0

        # ============
        # write remaining data
        # ============
        logging.info('Writing remaining data')
        counter_to = counter_from + write_buffer

        _write_range_to_hdf5(data, train_test, image_list, label_list,
                             counter_from, counter_to)
        _release_tmp_memory(image_list, label_list, train_test)

    # After test train loop:
    hdf5_file.close()
예제 #15
0
def prepare_data(input_folder, output_filepath, idx_start, idx_end, size,
                 target_resolution, labeller):

    # ===============================
    # create a hdf5 file
    # ===============================
    hdf5_file = h5py.File(output_filepath, "w")

    # ===============================
    # read all the patient folders from the base input folder
    # ===============================
    folder_list = []
    for folder in os.listdir(input_folder):
        folder_path = os.path.join(input_folder, folder)
        if os.path.isdir(folder_path) and 't2_tse_tra.nii.gz' in os.listdir(
                folder_path):
            if 'segmentation_' + labeller + '.nii.gz' in os.listdir(
                    folder_path
            ) or 'segmentation_tra_' + labeller + '.nii.gz' in os.listdir(
                    folder_path):
                folder_list.append(folder_path)

    # ===============================
    # Create datasets for images and labels
    # ===============================
    data = {}
    num_slices = count_slices(folder_list, idx_start, idx_end)
    data['images'] = hdf5_file.create_dataset("images",
                                              [num_slices] + list(size),
                                              dtype=np.float32)
    data['labels'] = hdf5_file.create_dataset("labels",
                                              [num_slices] + list(size),
                                              dtype=np.float32)

    # ===============================
    # initialize lists
    # ===============================
    label_list = []
    image_list = []
    nx_list = []
    ny_list = []
    nz_list = []
    px_list = []
    py_list = []
    pz_list = []
    pat_names_list = []

    # ===============================
    # ===============================
    write_buffer = 0
    counter_from = 0
    patient_counter = 0

    # ===============================
    # iterate through the requested indices
    # ===============================
    for idx in range(idx_start, idx_end):

        patient_counter = patient_counter + 1

        # ==================
        # read the image file
        # ==================
        image, _, image_hdr = utils.load_nii(folder_list[idx] +
                                             '/t2_tse_tra_n4.nii.gz')

        # ============
        # normalize the image to be between 0 and 1
        # ============
        image_normalized = utils.normalise_image(image, norm_type='div_by_max')

        # ==================
        # collect some header info.
        # ==================
        px_list.append(float(image_hdr.get_zooms()[0]))
        py_list.append(float(image_hdr.get_zooms()[1]))
        pz_list.append(float(image_hdr.get_zooms()[2]))
        nx_list.append(image.shape[0])
        ny_list.append(image.shape[1])
        nz_list.append(image.shape[2])
        pat_names_list.append(folder_list[idx][folder_list[idx].rfind('/') +
                                               1:])

        # ==================
        # read the label file
        # ==================
        if 'segmentation_' + labeller + '.nii.gz' in os.listdir(
                folder_list[idx]):
            label, _, _ = utils.load_nii(folder_list[idx] + '/segmentation_' +
                                         labeller + '.nii.gz')
        elif 'segmentation_tra_' + labeller + '.nii.gz' in os.listdir(
                folder_list[idx]):
            label, _, _ = utils.load_nii(folder_list[idx] +
                                         '/segmentation_tra_' + labeller +
                                         '.nii.gz')

        # ==================
        # remove extra label from some images
        # ==================
        label[label > 2] = 0
        print(np.unique(label))

        # ======================================================
        ### PROCESSING LOOP FOR SLICE-BY-SLICE 2D DATA ###################
        # ======================================================
        scale_vector = [
            image_hdr.get_zooms()[0] / target_resolution[0],
            image_hdr.get_zooms()[1] / target_resolution[1]
        ]

        for zz in range(image.shape[2]):

            # ============
            # rescale the images and labels so that their orientation matches that of the nci dataset
            # ============
            image2d_rescaled = rescale(np.squeeze(image_normalized[:, :, zz]),
                                       scale_vector,
                                       order=1,
                                       preserve_range=True,
                                       multichannel=False,
                                       mode='constant')

            label2d_rescaled = rescale(np.squeeze(label[:, :, zz]),
                                       scale_vector,
                                       order=0,
                                       preserve_range=True,
                                       multichannel=False,
                                       mode='constant')

            # ============
            # rotate the images and labels so that their orientation matches that of the nci dataset
            # ============
            image2d_rescaled_rotated = np.rot90(image2d_rescaled, k=3)
            label2d_rescaled_rotated = np.rot90(label2d_rescaled, k=3)

            # ============
            # crop or pad to make of the same size
            # ============
            image2d_rescaled_rotated_cropped = crop_or_pad_slice_to_size(
                image2d_rescaled_rotated, size[0], size[1])
            label2d_rescaled_rotated_cropped = crop_or_pad_slice_to_size(
                label2d_rescaled_rotated, size[0], size[1])

            image_list.append(image2d_rescaled_rotated_cropped)
            label_list.append(label2d_rescaled_rotated_cropped)

            write_buffer += 1

            # Writing needs to happen inside the loop over the slices
            if write_buffer >= MAX_WRITE_BUFFER:

                counter_to = counter_from + write_buffer

                _write_range_to_hdf5(data, image_list, label_list,
                                     counter_from, counter_to)

                _release_tmp_memory(image_list, label_list)

                # update counters
                counter_from = counter_to
                write_buffer = 0

    logging.info('Writing remaining data')
    counter_to = counter_from + write_buffer
    _write_range_to_hdf5(data, image_list, label_list, counter_from,
                         counter_to)
    _release_tmp_memory(image_list, label_list)

    # Write the small datasets
    hdf5_file.create_dataset('nx', data=np.asarray(nx_list, dtype=np.uint16))
    hdf5_file.create_dataset('ny', data=np.asarray(ny_list, dtype=np.uint16))
    hdf5_file.create_dataset('nz', data=np.asarray(nz_list, dtype=np.uint16))
    hdf5_file.create_dataset('px', data=np.asarray(px_list, dtype=np.float32))
    hdf5_file.create_dataset('py', data=np.asarray(py_list, dtype=np.float32))
    hdf5_file.create_dataset('pz', data=np.asarray(pz_list, dtype=np.float32))
    hdf5_file.create_dataset('patnames',
                             data=np.asarray(pat_names_list, dtype="S10"))

    # After test train loop:
    hdf5_file.close()
def prepare_data(input_folder, output_file, size, target_resolution, labels_list, rescale_to_one, offset=None, image_postfix='.nii.gz'):
    '''
    Main function that prepares a dataset from the raw challenge data to an hdf5 dataset
    '''

    csv_summary_file = os.path.join(input_folder, 'summary_alldata.csv')

    summary = pd.read_csv(csv_summary_file)
    # Use only cases that have imaging data (obs)
    summary = summary.loc[summary['image_exists'] == True]
    # Don't use images with unknown diagnosis
    summary = summary.loc[~(summary['diagnosis_3cat'] == 'unknown')]

    # Get list of unique rids
    rids = summary.rid.unique()

    # Get initial diagnosis for rough stratification
    diagnoses = []
    for rid in rids:
        diagnoses.append(
            summary.loc[summary['rid'] == rid]['diagnosis_3cat'].values[0])

    train_and_val_rids, test_rids, train_and_val_diagnoses, _ = train_test_split(
        rids, diagnoses, test_size=0.2, stratify=diagnoses)
    train_rids, val_rids = train_test_split(
        train_and_val_rids, test_size=0.2, stratify=train_and_val_diagnoses)

    print(len(train_rids), len(test_rids), len(val_rids))

    # n_images_train = len(summary.loc[summary['rid'].isin(train_rids)])
    # n_images_test = len(summary.loc[summary['rid'].isin(test_rids)])
    # n_images_val = len(summary.loc[summary['rid'].isin(val_rids)])

    hdf5_file = h5py.File(output_file, "w")

    diag_list = {'test': [], 'train': [], 'val': []}
    weight_list = {'test': [], 'train': [], 'val': []}
    age_list = {'test': [], 'train': [], 'val': []}
    gender_list = {'test': [], 'train': [], 'val': []}
    rid_list = {'test': [], 'train': [], 'val': []}
    viscode_list = {'test': [], 'train': [], 'val': []}
    adas13_list = {'test': [], 'train': [], 'val': []}
    mmse_list = {'test': [], 'train': [], 'val': []}
    field_strength_list = {'test': [], 'train': [], 'val': []}

    file_list = {'test': [], 'train': [], 'val': []}

    logging.info('Counting files and parsing meta data...')

    for train_test, set_rids in zip(['train', 'test', 'val'], [train_rids, test_rids, val_rids]):

        for ii, row in summary.iterrows():

            rid = row['rid']
            if rid not in set_rids:
                continue

            diagnosis_str = row['diagnosis_3cat']
            diagnosis = diagnosis_dict[diagnosis_str]
            if diagnosis not in labels_list:
                continue

            rid_list[train_test].append(rid)
            diag_list[train_test].append(diagnosis)

            viscode = row['viscode']
            viscode_list[train_test].append(viscode_dict[viscode])
            weight_list[train_test].append(row['weight'])
            age_list[train_test].append(row['age'])
            gender_list[train_test].append(gender_dict[row['gender']])
            adas13_list[train_test].append(fix_nan_and_unknown(
                row['adas13'], target_data_format=np.float32))
            mmse_list[train_test].append(fix_nan_and_unknown(
                row['mmse'], target_data_format=np.uint8))

            field_strength = row['field_strength']
            field_strength_list[train_test].append(field_strength)

            phase = row['phase']

            file_name = 'rid_%s/%s_%sT_%s_rid%s_%s%s' % (str(rid).zfill(4),
                                                         phase.lower(),
                                                         str(field_strength),
                                                         diagnosis_str,
                                                         str(rid).zfill(4),
                                                         viscode,
                                                         image_postfix)
            file_list[train_test].append(os.path.join(input_folder, file_name))

    # Write the small datasets
    for tt in ['test', 'train', 'val']:

        hdf5_file.create_dataset(
            'rid_%s' % tt, data=np.asarray(rid_list[tt], dtype=np.uint16))
        hdf5_file.create_dataset('viscode_%s' % tt, data=np.asarray(
            viscode_list[tt], dtype=np.uint8))
        hdf5_file.create_dataset(
            'diagnosis_%s' % tt, data=np.asarray(diag_list[tt], dtype=np.uint8))
        hdf5_file.create_dataset('age_%s' % tt, data=np.asarray(
            age_list[tt], dtype=np.float32))
        hdf5_file.create_dataset('weight_%s' % tt, data=np.asarray(
            weight_list[tt], dtype=np.float32))
        hdf5_file.create_dataset('gender_%s' % tt, data=np.asarray(
            gender_list[tt], dtype=np.uint8))
        hdf5_file.create_dataset('adas13_%s' % tt, data=np.asarray(
            adas13_list[tt], dtype=np.float32))
        hdf5_file.create_dataset(
            'mmse_%s' % tt, data=np.asarray(mmse_list[tt], dtype=np.uint8))
        hdf5_file.create_dataset('field_strength_%s' % tt, data=np.asarray(
            field_strength_list[tt], dtype=np.float16))

    n_train = len(file_list['train'])
    n_test = len(file_list['test'])
    n_val = len(file_list['val'])

    # assert n_train == n_images_train, 'Mismatch in data sizes, %d not == %d' % (n_train, n_images_train)
    # assert n_test == n_images_test, 'Mismatch in data sizes, %d not == %d' % (n_test, n_images_test)
    # assert n_val == n_images_val, 'Mismatch in data sizes, %d not == %d' % (n_val, n_images_val)

    # Create datasets for images and masks
    data = {}
    for tt, num_points in zip(['test', 'train', 'val'], [n_test, n_train, n_val]):
        data['images_%s' % tt] = hdf5_file.create_dataset(
            "images_%s" % tt, [num_points] + list(size), dtype=np.float32)

    img_list = {'test': [], 'train': [], 'val': []}

    logging.info('Parsing image files')

    for train_test in ['test', 'train', 'val']:

        write_buffer = 0
        counter_from = 0

        for file in file_list[train_test]:

            logging.info(
                '-----------------------------------------------------------')
            logging.info('Doing: %s' % file)

            img_dat = utils.load_nii(file)
            img = img_dat[0].copy()

            pixel_size = (img_dat[2].structarr['pixdim'][1],
                          img_dat[2].structarr['pixdim'][2],
                          img_dat[2].structarr['pixdim'][3])

            logging.info('Pixel size:')
            logging.info(pixel_size)

            scale_vector = [pixel_size[0] / target_resolution[0],
                            pixel_size[1] / target_resolution[1],
                            pixel_size[2] / target_resolution[2]]

            img_scaled = transform.rescale(img,
                                           scale_vector,
                                           order=1,
                                           preserve_range=True,
                                           multichannel=False,
                                           mode='constant')

            img_resized = crop_or_pad_slice_to_size(
                img_scaled, size, offset=offset)

            if rescale_to_one:
                img_resized = utils.map_image_to_intensity_range(
                    img_resized, -1, 1, percentiles=5)
            else:
                img_resized = utils.normalise_image(img_resized)

            ### DEBUGGING ############################################
            # utils.create_and_save_nii(img_resized, 'debug.nii.gz')
            # exit()
            #########################################################

            img_list[train_test].append(img_resized)

            write_buffer += 1

            if write_buffer >= MAX_WRITE_BUFFER:

                counter_to = counter_from + write_buffer
                _write_range_to_hdf5(
                    data, train_test, img_list, counter_from, counter_to)
                _release_tmp_memory(img_list, train_test)

                # reset stuff for next iteration
                counter_from = counter_to
                write_buffer = 0

        # after file loop: Write the remaining data

        logging.info('Writing remaining data')
        counter_to = counter_from + write_buffer

        _write_range_to_hdf5(data, train_test, img_list,
                             counter_from, counter_to)
        _release_tmp_memory(img_list, train_test)

    # After test train loop:
    hdf5_file.close()
예제 #17
0
def prepare_data(input_folder, output_file, mode, size, target_resolution):
    '''
    Main function that prepares a dataset from the raw challenge data to an hdf5 dataset
    '''

    assert (mode in ['2D', '3D']), 'Unknown mode: %s' % mode
    if mode == '2D' and not len(size) == 2:
        raise AssertionError('Inadequate number of size parameters')
    if mode == '3D' and not len(size) == 3:
        raise AssertionError('Inadequate number of size parameters')
    if mode == '2D' and not len(target_resolution) == 2:
        raise AssertionError(
            'Inadequate number of target resolution parameters')
    if mode == '3D' and not len(target_resolution) == 3:
        raise AssertionError(
            'Inadequate number of target resolution parameters')

    hdf5_file = h5py.File(output_file, "w")

    diag_list = {'test': [], 'train': [], 'validation': []}
    height_list = {'test': [], 'train': [], 'validation': []}
    weight_list = {'test': [], 'train': [], 'validation': []}
    patient_id_list = {'test': [], 'train': [], 'validation': []}
    cardiac_phase_list = {'test': [], 'train': [], 'validation': []}

    file_list = {'test': [], 'train': [], 'validation': []}
    num_slices = {'test': 0, 'train': 0, 'validation': 0}

    logging.info('Counting files and parsing meta data...')

    for folder in os.listdir(input_folder):

        folder_path = os.path.join(input_folder, folder)

        if os.path.isdir(folder_path):

            if int(folder[-3:]) % 5 == 0:
                train_test = 'test'
            elif int(folder[-3:]) % 4 == 0:
                train_test = 'validation'
            else:
                train_test = 'train'

            infos = {}
            for line in open(os.path.join(folder_path, 'Info.cfg')):
                label, value = line.split(':')
                infos[label] = value.rstrip('\n').lstrip(' ')

            patient_id = folder.lstrip('patient')

            for file in glob.glob(
                    os.path.join(folder_path, 'patient???_frame??.nii.gz')):

                file_list[train_test].append(file)

                # diag_list[train_test].append(diagnosis_to_int(infos['Group']))
                diag_list[train_test].append(diagnosis_dict[infos['Group']])
                weight_list[train_test].append(infos['Weight'])
                height_list[train_test].append(infos['Height'])

                patient_id_list[train_test].append(patient_id)

                systole_frame = int(infos['ES'])
                diastole_frame = int(infos['ED'])

                file_base = file.split('.')[0]
                frame = int(file_base.split('frame')[-1])
                if frame == systole_frame:
                    cardiac_phase_list[train_test].append(1)  # 1 == systole
                elif frame == diastole_frame:
                    cardiac_phase_list[train_test].append(2)  # 2 == diastole
                else:
                    cardiac_phase_list[train_test].append(
                        0)  # 0 means other phase

                nifty_img = nib.load(file)
                num_slices[train_test] += nifty_img.shape[2]

    # Write the small datasets
    for tt in ['test', 'train', 'validation']:
        hdf5_file.create_dataset('diagnosis_%s' % tt,
                                 data=np.asarray(diag_list[tt],
                                                 dtype=np.uint8))
        hdf5_file.create_dataset('weight_%s' % tt,
                                 data=np.asarray(weight_list[tt],
                                                 dtype=np.float32))
        hdf5_file.create_dataset('height_%s' % tt,
                                 data=np.asarray(height_list[tt],
                                                 dtype=np.float32))
        hdf5_file.create_dataset('patient_id_%s' % tt,
                                 data=np.asarray(patient_id_list[tt],
                                                 dtype=np.uint8))
        hdf5_file.create_dataset('cardiac_phase_%s' % tt,
                                 data=np.asarray(cardiac_phase_list[tt],
                                                 dtype=np.uint8))

    if mode == '3D':
        nx, ny, nz_max = size
        n_train = len(file_list['train'])
        n_test = len(file_list['test'])
        n_val = len(file_list['validation'])

    elif mode == '2D':
        nx, ny = size
        n_test = num_slices['test']
        n_train = num_slices['train']
        n_val = num_slices['validation']

    else:
        raise AssertionError('Wrong mode setting. This should never happen.')

    # print('Debug: Check if sets add up to correct value:')
    # print(n_train, n_val, n_test, n_train + n_val + n_test)

    # Create datasets for images and masks
    data = {}
    for tt, num_points in zip(['test', 'train', 'validation'],
                              [n_test, n_train, n_val]):

        if num_points > 0:
            data['images_%s' % tt] = hdf5_file.create_dataset(
                "images_%s" % tt, [num_points] + list(size), dtype=np.float32)
            data['masks_%s' % tt] = hdf5_file.create_dataset(
                "masks_%s" % tt, [num_points] + list(size), dtype=np.uint8)

    mask_list = {'test': [], 'train': [], 'validation': []}
    img_list = {'test': [], 'train': [], 'validation': []}

    logging.info('Parsing image files')

    for train_test in ['test', 'train', 'validation']:

        write_buffer = 0
        counter_from = 0

        full_mask_list = []
        patient_counter = 0
        for file in file_list[train_test]:

            patient_counter += 1

            logging.info(
                '-----------------------------------------------------------')
            logging.info('Doing: %s' % file)

            file_base = file.split('.nii.gz')[0]
            file_mask = file_base + '_gt.nii.gz'

            # patient_id = int(file_base.split('/')[-1].lstrip('patient').split('_')[0])

            img_dat = utils.load_nii(file)
            mask_dat = utils.load_nii(file_mask)

            img = img_dat[0].copy()
            mask = mask_dat[0].copy()

            img = utils.normalise_image(img)

            pixel_size = (img_dat[2].structarr['pixdim'][1],
                          img_dat[2].structarr['pixdim'][2],
                          img_dat[2].structarr['pixdim'][3])

            logging.info('Pixel size:')
            logging.info(pixel_size)

            ### PROCESSING LOOP FOR 3D DATA ################################
            if mode == '3D':

                scale_vector = [
                    pixel_size[0] / target_resolution[0],
                    pixel_size[1] / target_resolution[1],
                    pixel_size[2] / target_resolution[2]
                ]

                img_scaled = transform.rescale(img,
                                               scale_vector,
                                               order=1,
                                               preserve_range=True,
                                               multichannel=False,
                                               mode='constant')
                mask_scaled = transform.rescale(mask,
                                                scale_vector,
                                                order=0,
                                                preserve_range=True,
                                                multichannel=False,
                                                mode='constant')

                slice_vol = np.zeros((nx, ny, nz_max), dtype=np.float32)
                mask_vol = np.zeros((nx, ny, nz_max), dtype=np.uint8)

                nz_curr = img_scaled.shape[2]
                stack_from = (nz_max - nz_curr) // 2

                if stack_from < 0:
                    raise AssertionError(
                        'nz_max is too small for the chosen through plane resolution. Consider changing'
                        'the size or the target resolution in the through-plane.'
                    )

                for zz in range(nz_curr):

                    slice_rescaled = img_scaled[:, :, zz]
                    mask_rescaled = mask_scaled[:, :, zz]

                    slice_cropped = crop_or_pad_slice_to_size(
                        slice_rescaled, nx, ny)
                    mask_cropped = crop_or_pad_slice_to_size(
                        mask_rescaled, nx, ny)

                    slice_vol[:, :, stack_from] = slice_cropped
                    mask_vol[:, :, stack_from] = mask_cropped

                    stack_from += 1

                img_list[train_test].append(slice_vol)
                mask_list[train_test].append(mask_vol)

                write_buffer += 1

                if write_buffer >= MAX_WRITE_BUFFER:

                    counter_to = counter_from + write_buffer
                    _write_range_to_hdf5(data, train_test, img_list, mask_list,
                                         counter_from, counter_to)
                    _release_tmp_memory(img_list, mask_list, train_test)

                    # reset stuff for next iteration
                    counter_from = counter_to
                    write_buffer = 0

            ### PROCESSING LOOP FOR SLICE-BY-SLICE 2D DATA ###################
            elif mode == '2D':

                scale_vector = [
                    pixel_size[0] / target_resolution[0],
                    pixel_size[1] / target_resolution[1]
                ]

                for zz in range(img.shape[2]):

                    slice_img = np.squeeze(img[:, :, zz])
                    slice_rescaled = transform.rescale(slice_img,
                                                       scale_vector,
                                                       order=1,
                                                       preserve_range=True,
                                                       multichannel=False,
                                                       mode='constant')

                    slice_mask = np.squeeze(mask[:, :, zz])
                    mask_rescaled = transform.rescale(slice_mask,
                                                      scale_vector,
                                                      order=0,
                                                      preserve_range=True,
                                                      multichannel=False,
                                                      mode='constant')

                    slice_cropped = crop_or_pad_slice_to_size(
                        slice_rescaled, nx, ny)
                    mask_cropped = crop_or_pad_slice_to_size(
                        mask_rescaled, nx, ny)

                    img_list[train_test].append(slice_cropped)
                    mask_list[train_test].append(mask_cropped)

                    write_buffer += 1

                    # Writing needs to happen inside the loop over the slices
                    if write_buffer >= MAX_WRITE_BUFFER:

                        counter_to = counter_from + write_buffer
                        _write_range_to_hdf5(data, train_test, img_list,
                                             mask_list, counter_from,
                                             counter_to)
                        _release_tmp_memory(img_list, mask_list, train_test)

                        # reset stuff for next iteration
                        counter_from = counter_to
                        write_buffer = 0

            # scale_vector = [pixel_size[0] / target_resolution[0], pixel_size[1] / target_resolution[1]]
            #
            # for zz in range(img.shape[2]):
            #
            #     slice_img = np.squeeze(img[:, :, zz])
            #     slice_rescaled = transform.rescale(slice_img,
            #                                        scale_vector,
            #                                        order=1,
            #                                        preserve_range=True,
            #                                        multichannel=False,
            #                                        mode='constant')
            #
            #     slice_mask = np.squeeze(mask[:, :, zz])
            #     mask_rescaled = transform.rescale(slice_mask,
            #                                       scale_vector,
            #                                       order=0,
            #                                       preserve_range=True,
            #                                       multichannel=False,
            #                                       mode='constant')
            #
            #     slice_cropped = crop_or_pad_slice_to_size(slice_rescaled, nx, ny)
            #     mask_cropped = crop_or_pad_slice_to_size(mask_rescaled, nx, ny)
            #
            #     img_list[train_test].append(slice_cropped)
            #     mask_list[train_test].append(mask_cropped)
            #
            #
            #     write_buffer += 1
            #
            #     # Writing needs to happen inside the loop over the slices
            #     if write_buffer >= MAX_WRITE_BUFFER:
            #         counter_to = counter_from + write_buffer
            #         _write_range_to_hdf5(data, train_test, img_list, mask_list, counter_from, counter_to)
            #         _release_tmp_memory(img_list, mask_list, train_test)
            #
            #         # reset stuff for next iteration
            #         counter_from = counter_to
            #         write_buffer = 0

        # hdf5_file.create_dataset('full_mask_available_%s' % train_test, data=np.asarray(full_mask_list, dtype=np.uint8))

        # after file loop: Write the remaining data

        logging.info('Writing remaining data')
        counter_to = counter_from + write_buffer

        _write_range_to_hdf5(data, train_test, img_list, mask_list,
                             counter_from, counter_to)
        _release_tmp_memory(img_list, mask_list, train_test)

    # After test train loop:
    hdf5_file.close()
def main(input_folder,
         output_folder,
         model_path,
         exp_config,
         do_postprocessing=False,
         gt_exists=True):

    # Get Data
    data_loader = data_switch(exp_config.data_identifier)
    data = data_loader(exp_config)

    # Make and restore vagan model
    segmenter_model = segmenter(
        exp_config=exp_config, data=data,
        fixed_batch_size=1)  # CRF model requires fixed batch size
    segmenter_model.load_weights(model_path, type='best_dice')

    total_time = 0
    total_volumes = 0

    dice_list = []
    assd_list = []
    hd_list = []

    for folder in os.listdir(input_folder):

        folder_path = os.path.join(input_folder, folder)

        if os.path.isdir(folder_path):

            infos = {}
            for line in open(os.path.join(folder_path, 'Info.cfg')):
                label, value = line.split(':')
                infos[label] = value.rstrip('\n').lstrip(' ')

            patient_id = folder.lstrip('patient')

            if not int(patient_id) % 5 == 0:
                continue

            ED_frame = int(infos['ED'])
            ES_frame = int(infos['ES'])

            for file in glob.glob(
                    os.path.join(folder_path, 'patient???_frame??.nii.gz')):

                logging.info(' ----- Doing image: -------------------------')
                logging.info('Doing: %s' % file)
                logging.info(' --------------------------------------------')

                file_base = file.split('.nii.gz')[0]

                frame = int(file_base.split('frame')[-1])
                img, img_affine, img_header = utils.load_nii(file)
                img = utils.normalise_image(img)
                zooms = img_header.get_zooms()

                if gt_exists:
                    file_mask = file_base + '_gt.nii.gz'
                    mask, mask_affine, mask_header = utils.load_nii(file_mask)

                start_time = time.time()

                if exp_config.dimensionality_mode == '2D':

                    pixel_size = (img_header.structarr['pixdim'][1],
                                  img_header.structarr['pixdim'][2])
                    scale_vector = (pixel_size[0] /
                                    exp_config.target_resolution[0],
                                    pixel_size[1] /
                                    exp_config.target_resolution[1])

                    predictions = []

                    nx, ny = exp_config.image_size

                    for zz in range(img.shape[2]):

                        slice_img = np.squeeze(img[:, :, zz])
                        slice_rescaled = transform.rescale(slice_img,
                                                           scale_vector,
                                                           order=1,
                                                           preserve_range=True,
                                                           multichannel=False,
                                                           mode='constant')

                        x, y = slice_rescaled.shape

                        x_s = (x - nx) // 2
                        y_s = (y - ny) // 2
                        x_c = (nx - x) // 2
                        y_c = (ny - y) // 2

                        # Crop section of image for prediction
                        if x > nx and y > ny:
                            slice_cropped = slice_rescaled[x_s:x_s + nx,
                                                           y_s:y_s + ny]
                        else:
                            slice_cropped = np.zeros((nx, ny))
                            if x <= nx and y > ny:
                                slice_cropped[x_c:x_c +
                                              x, :] = slice_rescaled[:,
                                                                     y_s:y_s +
                                                                     ny]
                            elif x > nx and y <= ny:
                                slice_cropped[:, y_c:y_c +
                                              y] = slice_rescaled[x_s:x_s +
                                                                  nx, :]
                            else:
                                slice_cropped[x_c:x_c + x, y_c:y_c +
                                              y] = slice_rescaled[:, :]

                        # GET PREDICTION
                        network_input = np.float32(
                            np.tile(np.reshape(slice_cropped, (nx, ny, 1)),
                                    (1, 1, 1, 1)))
                        mask_out, softmax = segmenter_model.predict(
                            network_input)

                        prediction_cropped = np.squeeze(softmax[0, ...])

                        # ASSEMBLE BACK THE SLICES
                        slice_predictions = np.zeros(
                            (x, y, exp_config.nlabels))
                        # insert cropped region into original image again
                        if x > nx and y > ny:
                            slice_predictions[x_s:x_s + nx, y_s:y_s +
                                              ny, :] = prediction_cropped
                        else:
                            if x <= nx and y > ny:
                                slice_predictions[:, y_s:y_s +
                                                  ny, :] = prediction_cropped[
                                                      x_c:x_c + x, :, :]
                            elif x > nx and y <= ny:
                                slice_predictions[
                                    x_s:x_s +
                                    nx, :, :] = prediction_cropped[:, y_c:y_c +
                                                                   y, :]
                            else:
                                slice_predictions[:, :, :] = prediction_cropped[
                                    x_c:x_c + x, y_c:y_c + y, :]

                        # RESCALING ON THE LOGITS
                        if gt_exists:
                            prediction = transform.resize(
                                slice_predictions,
                                (mask.shape[0], mask.shape[1],
                                 exp_config.nlabels),
                                order=1,
                                preserve_range=True,
                                mode='constant')
                        else:  # This can occasionally lead to wrong volume size, therefore if gt_exists
                            # we use the gt mask size for resizing.
                            prediction = transform.rescale(
                                slice_predictions, (1.0 / scale_vector[0],
                                                    1.0 / scale_vector[1], 1),
                                order=1,
                                preserve_range=True,
                                multichannel=False,
                                mode='constant')

                        prediction = np.uint8(np.argmax(prediction, axis=-1))
                        # import matplotlib.pyplot as plt
                        # fig = plt.Figure()
                        # for ii in range(3):
                        #     plt.subplot(1, 3, ii + 1)
                        #     plt.imshow(np.squeeze(prediction))
                        # plt.show()

                        predictions.append(prediction)

                    prediction_arr = np.transpose(
                        np.asarray(predictions, dtype=np.uint8), (1, 2, 0))

                elif exp_config.dimensionality_mode == '3D':

                    nx, ny, nz = exp_config.image_size

                    pixel_size = (img_header.structarr['pixdim'][1],
                                  img_header.structarr['pixdim'][2],
                                  img_header.structarr['pixdim'][3])

                    scale_vector = (pixel_size[0] /
                                    exp_config.target_resolution[0],
                                    pixel_size[1] /
                                    exp_config.target_resolution[1],
                                    pixel_size[2] /
                                    exp_config.target_resolution[2])

                    vol_scaled = transform.rescale(img,
                                                   scale_vector,
                                                   order=1,
                                                   preserve_range=True,
                                                   multichannel=False,
                                                   mode='constant')

                    nz_max = exp_config.image_size[2]
                    slice_vol = np.zeros((nx, ny, nz_max), dtype=np.float32)

                    nz_curr = vol_scaled.shape[2]
                    stack_from = (nz_max - nz_curr) // 2
                    stack_counter = stack_from

                    x, y, z = vol_scaled.shape

                    x_s = (x - nx) // 2
                    y_s = (y - ny) // 2
                    x_c = (nx - x) // 2
                    y_c = (ny - y) // 2

                    for zz in range(nz_curr):

                        slice_rescaled = vol_scaled[:, :, zz]

                        if x > nx and y > ny:
                            slice_cropped = slice_rescaled[x_s:x_s + nx,
                                                           y_s:y_s + ny]
                        else:
                            slice_cropped = np.zeros((nx, ny))
                            if x <= nx and y > ny:
                                slice_cropped[x_c:x_c +
                                              x, :] = slice_rescaled[:,
                                                                     y_s:y_s +
                                                                     ny]
                            elif x > nx and y <= ny:
                                slice_cropped[:, y_c:y_c +
                                              y] = slice_rescaled[x_s:x_s +
                                                                  nx, :]

                            else:
                                slice_cropped[x_c:x_c + x, y_c:y_c +
                                              y] = slice_rescaled[:, :]

                        slice_vol[:, :, stack_counter] = slice_cropped
                        stack_counter += 1

                    stack_to = stack_counter

                    network_input = np.float32(
                        np.reshape(slice_vol, (1, nx, ny, nz_max, 1)))
                    start_time = time.time()
                    mask_out, softmax = segmenter_model.predict(network_input)
                    logging.info('Classified 3D: %f secs' %
                                 (time.time() - start_time))

                    prediction_nzs = mask_out[0, :, :, stack_from:
                                              stack_to]  # non-zero-slices

                    if not prediction_nzs.shape[2] == nz_curr:
                        raise ValueError('sizes mismatch')

                    # ASSEMBLE BACK THE SLICES
                    prediction_scaled = np.zeros(
                        vol_scaled.shape)  # last dim is for logits classes

                    # insert cropped region into original image again
                    if x > nx and y > ny:
                        prediction_scaled[x_s:x_s + nx,
                                          y_s:y_s + ny, :] = prediction_nzs
                    else:
                        if x <= nx and y > ny:
                            prediction_scaled[:, y_s:y_s +
                                              ny, :] = prediction_nzs[x_c:x_c +
                                                                      x, :, :]
                        elif x > nx and y <= ny:
                            prediction_scaled[
                                x_s:x_s +
                                nx, :, :] = prediction_nzs[:, y_c:y_c + y, :]
                        else:
                            prediction_scaled[:, :, :] = prediction_nzs[
                                x_c:x_c + x, y_c:y_c + y, :]

                    logging.info('Prediction_scaled mean %f' %
                                 (np.mean(prediction_scaled)))

                    prediction = transform.resize(
                        prediction_scaled,
                        (mask.shape[0], mask.shape[1], mask.shape[2], 1),
                        order=1,
                        preserve_range=True,
                        mode='constant')
                    prediction = np.argmax(prediction, axis=-1)
                    prediction_arr = np.asarray(prediction, dtype=np.uint8)

                # This is the same for 2D and 3D again
                if do_postprocessing:
                    prediction_arr = utils.keep_largest_connected_components(
                        prediction_arr)

                elapsed_time = time.time() - start_time
                total_time += elapsed_time
                total_volumes += 1

                logging.info('Evaluation of volume took %f secs.' %
                             elapsed_time)

                if frame == ED_frame:
                    frame_suffix = '_ED'
                elif frame == ES_frame:
                    frame_suffix = '_ES'
                else:
                    raise ValueError(
                        'Frame doesnt correspond to ED or ES. frame = %d, ED = %d, ES = %d'
                        % (frame, ED_frame, ES_frame))

                # Save prediced mask
                out_file_name = os.path.join(
                    output_folder, 'prediction',
                    'patient' + patient_id + frame_suffix + '.nii.gz')
                if gt_exists:
                    out_affine = mask_affine
                    out_header = mask_header
                else:
                    out_affine = img_affine
                    out_header = img_header

                logging.info('saving to: %s' % out_file_name)
                utils.save_nii(out_file_name, prediction_arr, out_affine,
                               out_header)

                # Save image data to the same folder for convenience
                image_file_name = os.path.join(
                    output_folder, 'image',
                    'patient' + patient_id + frame_suffix + '.nii.gz')
                logging.info('saving to: %s' % image_file_name)
                utils.save_nii(image_file_name, img, out_affine, out_header)

                if gt_exists:

                    # Save GT image
                    gt_file_name = os.path.join(
                        output_folder, 'ground_truth',
                        'patient' + patient_id + frame_suffix + '.nii.gz')
                    logging.info('saving to: %s' % gt_file_name)
                    utils.save_nii(gt_file_name, mask, out_affine, out_header)

                    # Save difference mask between predictions and ground truth
                    difference_mask = np.where(
                        np.abs(prediction_arr - mask) > 0, [1], [0])
                    difference_mask = np.asarray(difference_mask,
                                                 dtype=np.uint8)
                    diff_file_name = os.path.join(
                        output_folder, 'difference',
                        'patient' + patient_id + frame_suffix + '.nii.gz')
                    logging.info('saving to: %s' % diff_file_name)
                    utils.save_nii(diff_file_name, difference_mask, out_affine,
                                   out_header)

                # calculate metrics
                y_ = prediction_arr
                y = mask

                per_lbl_dice = []
                per_lbl_assd = []
                per_lbl_hd = []

                for lbl in [3, 1, 2]:  #range(exp_config.nlabels):

                    binary_pred = (y_ == lbl) * 1
                    binary_gt = (y == lbl) * 1

                    if np.sum(binary_gt) == 0 and np.sum(binary_pred) == 0:
                        per_lbl_dice.append(1)
                        per_lbl_assd.append(0)
                        per_lbl_hd.append(0)
                    elif np.sum(binary_pred) > 0 and np.sum(
                            binary_gt) == 0 or np.sum(
                                binary_pred) == 0 and np.sum(binary_gt) > 0:
                        logging.warning(
                            'Structure missing in either GT (x)or prediction. ASSD and HD will not be accurate.'
                        )
                        per_lbl_dice.append(0)
                        per_lbl_assd.append(1)
                        per_lbl_hd.append(1)
                    else:
                        per_lbl_dice.append(dc(binary_pred, binary_gt))
                        per_lbl_assd.append(
                            assd(binary_pred, binary_gt, voxelspacing=zooms))
                        per_lbl_hd.append(
                            hd(binary_pred, binary_gt, voxelspacing=zooms))

                dice_list.append(per_lbl_dice)
                assd_list.append(per_lbl_assd)
                hd_list.append(per_lbl_hd)

    logging.info('Average time per volume: %f' % (total_time / total_volumes))

    dice_arr = np.asarray(dice_list)
    assd_arr = np.asarray(assd_list)
    hd_arr = np.asarray(hd_list)

    mean_per_lbl_dice = dice_arr.mean(axis=0)
    mean_per_lbl_assd = assd_arr.mean(axis=0)
    mean_per_lbl_hd = hd_arr.mean(axis=0)

    logging.info('Dice')
    logging.info(mean_per_lbl_dice)
    logging.info(np.mean(mean_per_lbl_dice))
    logging.info('ASSD')
    logging.info(mean_per_lbl_assd)
    logging.info(np.mean(mean_per_lbl_assd))
    logging.info('HD')
    logging.info(mean_per_lbl_hd)
    logging.info(np.mean(mean_per_lbl_hd))
예제 #19
0
def prepare_data(input_folder, preproc_folder, protocol, idx_start, idx_end):

    images = []
    affines = []
    patnames = []
    masks = []

    # ========================
    # read the filenames
    # ========================
    filenames = sorted(glob.glob(input_folder + '*.zip'))
    logging.info('Number of images in the dataset: %s' % str(len(filenames)))

    # ========================
    # iterate through the requested indices
    # ========================
    for idx in range(idx_start, idx_end):

        logging.info(
            '============================================================')

        # ========================
        # get the file name for this subject
        # ========================
        filename = filenames[idx]

        # ========================
        # define how much of the image can be cropped out as it consists of zeros
        # ========================
        x_start = 18
        x_end = -18
        y_start = 28
        y_end = -27
        z_start = 2
        z_end = -34
        # original images are 260 * 311 * 260
        # cropping them down to 224 * 256 * 224

        # ========================
        # read the contents inside the top-level subject directory
        # ========================
        with zipfile.ZipFile(filename, 'r') as zfile:

            # ========================
            # search for the relevant files
            # ========================
            for name in zfile.namelist():

                # ========================
                # search for files inside the T1w directory
                # ========================
                if re.search(r'\/T1w/', name) != None:

                    # ========================
                    # search for .gz files inside the T1w directory
                    # ========================
                    if re.search(r'\.gz$', name) != None:

                        # ========================
                        # get the protocol image
                        # ========================
                        if re.search(protocol + 'acpc_dc_restore_brain',
                                     name) != None:

                            logging.info('reading image: %s' % name)

                            _filepath = zfile.extract(
                                name, sys_config.preproc_folder_hcp
                            )  # extract the image filepath

                            _patname = name[:name.find(
                                '/')]  # extract the patient name

                            _img_data, _img_affine, _img_header = utils.load_nii(
                                _filepath)  # read the 3d image

                            _img_data = _img_data[
                                x_start:x_end, y_start:y_end, z_start:
                                z_end]  # discard some pixels as they are always zero.

                            _img_data = utils.normalise_image(
                                _img_data, norm_type='div_by_max'
                            )  # normalise the image (volume wise)

                            savepath = sys_config.preproc_folder_hcp + _patname + '/preprocessed_image' + protocol + '.nii'  # save the pre-processed image
                            utils.save_nii(savepath, _img_data, _img_affine,
                                           _img_header)

                            images.append(
                                _img_data
                            )  # append to the list of all images, affines and patient names
                            affines.append(_img_affine)
                            patnames.append(_patname)

                        # ========================
                        # get the segmentation mask
                        # ========================
                        if re.search(
                                'aparc.aseg', name
                        ) != None:  # segmentation mask with ~100 classes

                            if re.search('T1wDividedByT2w_', name) == None:

                                logging.info('reading mask: %s' % name)

                                _segpath = zfile.extract(
                                    name, sys_config.preproc_folder_hcp
                                )  # extract the segmentation mask

                                _patname = name[:name.find(
                                    '/')]  # extract the patient name

                                _seg_data, _seg_affine, _seg_header = utils.load_nii(
                                    _segpath)  # read the segmentation mask

                                _seg_data = _seg_data[
                                    x_start:x_end, y_start:y_end, z_start:
                                    z_end]  # discard some pixels as they are always zero.

                                _seg_data = utils.group_segmentation_classes(
                                    _seg_data
                                )  # group the segmentation classes as required

                                savepath = sys_config.preproc_folder_hcp + _patname + '/preprocessed_gt15.nii'  # save the pre-processed segmentation ground truth
                                utils.save_nii(savepath, _seg_data,
                                               _seg_affine, _seg_header)

                                masks.append(
                                    _seg_data
                                )  # append to the list of all masks

    # ========================
    # convert the lists to arrays
    # ========================
    images = np.array(images)
    affines = np.array(affines)
    patnames = np.array(patnames)
    masks = np.array(masks, dtype='uint8')

    # ========================
    # merge along the y-zis to get a stack of x-z slices, for the images as well as the masks
    # ========================
    images = images.swapaxes(1, 2)
    images = images.reshape(-1, images.shape[2], images.shape[3])
    masks = masks.swapaxes(1, 2)
    masks = masks.reshape(-1, masks.shape[2], masks.shape[3])

    # ========================
    # save the processed images and masks so that they can be directly read the next time
    # make appropriate filenames according to the requested indices of training, validation and test images
    # ========================
    logging.info('Saving pre-processed files...')
    config_details = '%sfrom%dto%d_' % (protocol, idx_start, idx_end)
    filepath_images = preproc_folder + config_details + 'images_2d.npy'
    filepath_masks = preproc_folder + config_details + 'annotations15_2d.npy'
    filepath_affine = preproc_folder + config_details + 'affines.npy'
    filepath_patnames = preproc_folder + config_details + 'patnames.npy'
    np.save(filepath_images, images)
    np.save(filepath_masks, masks)
    np.save(filepath_affine, affines)
    np.save(filepath_patnames, patnames)

    return images, masks, affines, patnames
def prepare_data(input_folder,
                 output_file,
                 size,
                 target_resolution,
                 cv_fold_num):

    # =======================
    # =======================
    image_folder = os.path.join(input_folder, 'Prostate-3T')
    mask_folder = os.path.join(input_folder, 'NCI_ISBI_Challenge-Prostate3T_Training_Segmentations')

    # =======================
    # =======================
    hdf5_file = h5py.File(output_file, "w")

    # =======================
    # =======================
    logging.info('Counting files and parsing meta data...')
    folder_list = get_patient_folders(image_folder,
                                      folder_base='Prostate3T-01',
                                      cv_fold_number = cv_fold_num)
    
    num_slices = count_slices(image_folder,
                              folder_base='Prostate3T-01',
                              cv_fold_number = cv_fold_num)
    
    nx, ny = size
    n_test = num_slices['test']
    n_train = num_slices['train']
    n_val = num_slices['validation']

    # =======================
    # =======================
    print('Debug: Check if sets add up to correct value:')
    print(n_train, n_val, n_test, n_train + n_val + n_test)

    # =======================
    # Create datasets for images and masks
    # =======================
    data = {}
    for tt, num_points in zip(['test', 'train', 'validation'], [n_test, n_train, n_val]):

        if num_points > 0:
            data['images_%s' % tt] = hdf5_file.create_dataset("images_%s" % tt, [num_points] + list(size), dtype=np.float32)
            data['masks_%s' % tt] = hdf5_file.create_dataset("masks_%s" % tt, [num_points] + list(size), dtype=np.uint8)

    mask_list = {'test': [], 'train': [], 'validation': []}
    img_list = {'test': [], 'train': [], 'validation': []}
    nx_list = {'test': [], 'train': [], 'validation': []}
    ny_list = {'test': [], 'train': [], 'validation': []}
    nz_list = {'test': [], 'train': [], 'validation': []}
    px_list = {'test': [], 'train': [], 'validation': []}
    py_list = {'test': [], 'train': [], 'validation': []}
    pz_list = {'test': [], 'train': [], 'validation': []}
    pat_names_list = {'test': [], 'train': [], 'validation': []}

    # =======================
    # =======================
    logging.info('Parsing image files')
    for train_test in ['test', 'train', 'validation']:

        write_buffer = 0
        counter_from = 0

        patient_counter = 0

        for folder in folder_list[train_test]:

            patient_counter += 1

            logging.info('================================')
            logging.info('Doing: %s' % folder)
            pat_names_list[train_test].append(str(folder.split('-')[-1]))

            lstFilesDCM = []  # create an empty list
            
            for dirName, subdirList, fileList in os.walk(folder):
            
                # fileList.sort()
                for filename in fileList:
                
                    if ".dcm" in filename.lower():  # check whether the file's DICOM
                        lstFilesDCM.append(os.path.join(dirName, filename))

            # Get ref file
            RefDs = dicom.read_file(lstFilesDCM[0])

            # Load dimensions based on the number of rows, columns, and slices (along the Z axis)
            ConstPixelDims = (int(RefDs.Rows), int(RefDs.Columns), len(lstFilesDCM))

            # Load spacing values (in mm)
            pixel_size = (float(RefDs.PixelSpacing[0]), float(RefDs.PixelSpacing[1]), float(RefDs.SliceThickness))
            px_list[train_test].append(float(RefDs.PixelSpacing[0]))
            py_list[train_test].append(float(RefDs.PixelSpacing[1]))
            pz_list[train_test].append(float(RefDs.SliceThickness))

            print('PixelDims')
            print(ConstPixelDims)
            print('PixelSpacing')
            print(pixel_size)

            # The array is sized based on 'ConstPixelDims'
            img = np.zeros(ConstPixelDims, dtype=RefDs.pixel_array.dtype)

            # loop through all the DICOM files
            for filenameDCM in lstFilesDCM:

                # read the file
                ds = dicom.read_file(filenameDCM)

                # ======
                # store the raw image data
                # img[:, :, lstFilesDCM.index(filenameDCM)] = ds.pixel_array
                # index number field is not set correctly!
                # instead instance number is the slice number.
                # ======
                img[:, :, ds.InstanceNumber - 1] = ds.pixel_array
                
            # ================================
            # save as nifti, this sets the affine transformation as an identity matrix
            # ================================    
            nifti_img_path = lstFilesDCM[0][:lstFilesDCM[0].rfind('/')+1]
            utils.save_nii(img_path = nifti_img_path + 'img.nii.gz', data = img, affine = np.eye(4))
    
            # ================================
            # do bias field correction
            # ================================
            input_img = nifti_img_path + 'img.nii.gz'
            output_img = nifti_img_path + 'img_n4.nii.gz'
            subprocess.call(["/usr/bmicnas01/data-biwi-01/bmicdatasets/Sharing/N4_th", input_img, output_img])
    
            # ================================    
            # read bias corrected image
            # ================================    
            img = utils.load_nii(img_path = nifti_img_path + 'img_n4.nii.gz')[0]

            # ================================    
            # normalize the image
            # ================================    
            img = utils.normalise_image(img, norm_type='div_by_max')

            # ================================    
            # read the labels
            # ================================    
            mask_path = os.path.join(mask_folder, folder.split('/')[-1] + '.nrrd')
            mask, options = nrrd.read(mask_path)

            # fix swap axis
            mask = np.swapaxes(mask, 0, 1)
            
            # ================================
            # save as nifti, this sets the affine transformation as an identity matrix
            # ================================    
            utils.save_nii(img_path = nifti_img_path + 'lbl.nii.gz', data = mask, affine = np.eye(4))
            
            nx_list[train_test].append(mask.shape[0])
            ny_list[train_test].append(mask.shape[1])
            nz_list[train_test].append(mask.shape[2])

            print('mask.shape')
            print(mask.shape)
            print('img.shape')
            print(img.shape)

            ### PROCESSING LOOP FOR SLICE-BY-SLICE 2D DATA ###################
            scale_vector = [pixel_size[0] / target_resolution[0],
                            pixel_size[1] / target_resolution[1]]

            for zz in range(img.shape[2]):

                slice_img = np.squeeze(img[:, :, zz])
                slice_rescaled = transform.rescale(slice_img,
                                                   scale_vector,
                                                   order=1,
                                                   preserve_range=True,
                                                   multichannel=False,
                                                   mode = 'constant')

                slice_mask = np.squeeze(mask[:, :, zz])
                mask_rescaled = transform.rescale(slice_mask,
                                                  scale_vector,
                                                  order=0,
                                                  preserve_range=True,
                                                  multichannel=False,
                                                  mode='constant')

                slice_cropped = utils.crop_or_pad_slice_to_size(slice_rescaled, nx, ny)
                mask_cropped = utils.crop_or_pad_slice_to_size(mask_rescaled, nx, ny)

                img_list[train_test].append(slice_cropped)
                mask_list[train_test].append(mask_cropped)

                write_buffer += 1

                # Writing needs to happen inside the loop over the slices
                if write_buffer >= MAX_WRITE_BUFFER:

                    counter_to = counter_from + write_buffer
                    _write_range_to_hdf5(data, train_test, img_list, mask_list, counter_from, counter_to)
                    _release_tmp_memory(img_list, mask_list, train_test)

                    # reset stuff for next iteration
                    counter_from = counter_to
                    write_buffer = 0


        logging.info('Writing remaining data')
        counter_to = counter_from + write_buffer

        _write_range_to_hdf5(data, train_test, img_list, mask_list, counter_from, counter_to)
        _release_tmp_memory(img_list, mask_list, train_test)

    # Write the small datasets
    for tt in ['test', 'train', 'validation']:
        hdf5_file.create_dataset('nx_%s' % tt, data=np.asarray(nx_list[tt], dtype=np.uint16))
        hdf5_file.create_dataset('ny_%s' % tt, data=np.asarray(ny_list[tt], dtype=np.uint16))
        hdf5_file.create_dataset('nz_%s' % tt, data=np.asarray(nz_list[tt], dtype=np.uint16))
        hdf5_file.create_dataset('px_%s' % tt, data=np.asarray(px_list[tt], dtype=np.float32))
        hdf5_file.create_dataset('py_%s' % tt, data=np.asarray(py_list[tt], dtype=np.float32))
        hdf5_file.create_dataset('pz_%s' % tt, data=np.asarray(pz_list[tt], dtype=np.float32))
        hdf5_file.create_dataset('patnames_%s' % tt, data=np.asarray(pat_names_list[tt], dtype="S10"))
    
    # After test train loop:
    hdf5_file.close()
예제 #21
0
def prepare_data(input_folder,
                 preproc_folder, # bias corrected images will be saved here already
                 output_file,
                 size,
                 target_resolution,
                 cv_fold_num):

    # =======================
    # create the hdf5 file where everything will be written
    # =======================
    hdf5_file = h5py.File(output_file, "w")

    # =======================
    # read all the images and count the number of slices along the append axis (the one with the lowest resolution)
    # =======================
    logging.info('Counting files and parsing meta data...')    
    # using the bias corrected images in the preproc folder for this step
    num_slices, patient_ids_list = count_slices_and_patient_ids_list(preproc_folder,
                                                                     cv_fold_number = cv_fold_num)
        
    # =======================
    # set the number of slices according to what has been found from the previous function
    # =======================
    nx, ny = size
    n_test = num_slices['test']
    n_train = num_slices['train']
    n_val = num_slices['validation']

    # =======================
    # Create datasets for images and masks
    # =======================
    data = {}
    for tt, num_points in zip(['test', 'train', 'validation'], [n_test, n_train, n_val]):

        if num_points > 0:
            data['images_%s' % tt] = hdf5_file.create_dataset("images_%s" % tt, [num_points] + list(size), dtype=np.float32)
            data['masks_%s' % tt] = hdf5_file.create_dataset("masks_%s" % tt, [num_points] + list(size), dtype=np.uint8)

    mask_list = {'test': [], 'train': [], 'validation': []}
    img_list = {'test': [], 'train': [], 'validation': []}
    nx_list = {'test': [], 'train': [], 'validation': []}
    ny_list = {'test': [], 'train': [], 'validation': []}
    nz_list = {'test': [], 'train': [], 'validation': []}
    px_list = {'test': [], 'train': [], 'validation': []}
    py_list = {'test': [], 'train': [], 'validation': []}
    pz_list = {'test': [], 'train': [], 'validation': []}
    pat_names_list = {'test': [], 'train': [], 'validation': []}              
                
    # =======================
    # read data of each subject, preprocess it and write to the hdf5 file
    # =======================
    logging.info('Parsing image files')
    for train_test in ['test', 'train', 'validation']:

        write_buffer = 0
        counter_from = 0
        patient_counter = 0
        
        for patient_id in patient_ids_list[train_test]:
            
            filepath_orig_mhd_format = input_folder + 'Case' + patient_id + '.mhd'
            filepath_orig_nii_format = preproc_folder + 'Case' + patient_id + '.nii.gz'
            filepath_bias_corrected_nii_format = preproc_folder + 'Case' + patient_id + '_n4.nii.gz'
            filepath_seg_nii_format = preproc_folder + 'Case' + patient_id + '_segmentation.nii.gz'

            patient_counter += 1
            pat_names_list[train_test].append('case' + patient_id)

            logging.info('================================')
            logging.info('Doing: %s' % filepath_orig_mhd_format)
            
            # ================================    
            # read the original mhd image, in order to extract pixel resolution information
            # ================================    
            img_mhd = sitk.ReadImage(filepath_orig_mhd_format)
            pixel_size = img_mhd.GetSpacing()
            px_list[train_test].append(float(pixel_size[0]))
            py_list[train_test].append(float(pixel_size[1]))
            pz_list[train_test].append(float(pixel_size[2]))

            # ================================    
            # read bias corrected image
            # ================================    
            img = utils.load_nii(filepath_bias_corrected_nii_format)[0]

            # ================================    
            # normalize the image
            # ================================    
            img = utils.normalise_image(img, norm_type='div_by_max')

            # ================================    
            # read the labels
            # ================================    
            mask = utils.load_nii(filepath_seg_nii_format)[0]            
            
            # ================================    
            # skimage io with simple ITKplugin was used to read the images in the convert_to_nii_and_correct_bias_field function.
            # this lead to the arrays being read as z-x-y
            # move the axes appropriately, so that the resolution read above is correct for the corresponding axes.
            # ================================    
            img = np.swapaxes(np.swapaxes(img, 0, 1), 1, 2)
            mask = np.swapaxes(np.swapaxes(mask, 0, 1), 1, 2)
            
            # ================================    
            # write to the dimensions now
            # ================================    
            nx_list[train_test].append(mask.shape[0])
            ny_list[train_test].append(mask.shape[1])
            nz_list[train_test].append(mask.shape[2])

            print('mask.shape')
            print(mask.shape)
            print('img.shape')
            print(img.shape)
            
            ### PROCESSING LOOP FOR SLICE-BY-SLICE 2D DATA ###################
            scale_vector = [pixel_size[0] / target_resolution[0],
                            pixel_size[1] / target_resolution[1]]

            for zz in range(img.shape[2]):

                slice_img = np.squeeze(img[:, :, zz])
                slice_rescaled = transform.rescale(slice_img,
                                                   scale_vector,
                                                   order=1,
                                                   preserve_range=True,
                                                   multichannel=False,
                                                   mode = 'constant')

                slice_mask = np.squeeze(mask[:, :, zz])
                mask_rescaled = transform.rescale(slice_mask,
                                                  scale_vector,
                                                  order=0,
                                                  preserve_range=True,
                                                  multichannel=False,
                                                  mode='constant')

                slice_cropped = utils.crop_or_pad_slice_to_size(slice_rescaled, nx, ny)
                mask_cropped = utils.crop_or_pad_slice_to_size(mask_rescaled, nx, ny)

                img_list[train_test].append(slice_cropped)
                mask_list[train_test].append(mask_cropped)

                write_buffer += 1

                # Writing needs to happen inside the loop over the slices
                if write_buffer >= MAX_WRITE_BUFFER:

                    counter_to = counter_from + write_buffer
                    _write_range_to_hdf5(data, train_test, img_list, mask_list, counter_from, counter_to)
                    _release_tmp_memory(img_list, mask_list, train_test)

                    # reset stuff for next iteration
                    counter_from = counter_to
                    write_buffer = 0


        logging.info('Writing remaining data')
        counter_to = counter_from + write_buffer

        _write_range_to_hdf5(data, train_test, img_list, mask_list, counter_from, counter_to)
        _release_tmp_memory(img_list, mask_list, train_test)

    # Write the small datasets
    for tt in ['test', 'train', 'validation']:
        hdf5_file.create_dataset('nx_%s' % tt, data=np.asarray(nx_list[tt], dtype=np.uint16))
        hdf5_file.create_dataset('ny_%s' % tt, data=np.asarray(ny_list[tt], dtype=np.uint16))
        hdf5_file.create_dataset('nz_%s' % tt, data=np.asarray(nz_list[tt], dtype=np.uint16))
        hdf5_file.create_dataset('px_%s' % tt, data=np.asarray(px_list[tt], dtype=np.float32))
        hdf5_file.create_dataset('py_%s' % tt, data=np.asarray(py_list[tt], dtype=np.float32))
        hdf5_file.create_dataset('pz_%s' % tt, data=np.asarray(pz_list[tt], dtype=np.float32))
        hdf5_file.create_dataset('patnames_%s' % tt, data=np.asarray(pat_names_list[tt], dtype="S10"))
    
    # After test train loop:
    hdf5_file.close()
def prepare_data(input_folder, preproc_folder, idx_start, idx_end,
                 bias_correction):

    images = []
    affines = []
    patnames = []
    masks = []

    # read the foldernames
    foldernames = sorted(glob.glob(input_folder + '*/'))
    logging.info('Number of images in the dataset: %s' % str(len(foldernames)))

    # iterate through all indices
    for idx in range(len(foldernames)):

        # only consider images within the indices requested
        if (idx < idx_start) or (idx >= idx_end):
            logging.info('skipping subject: %d' % idx)
            continue

        # get the file name for this subject
        foldername = foldernames[idx]

        # extract the patient name
        _patname = foldername[foldername[:-1].rfind('/') + 1:-1]
        if _patname == 'A00033264':  # this subject has images of a different size
            continue

        # ====================================================
        # search for the segmentation file
        # ====================================================
        name = foldername + 'orig_labels_aligned_with_true_image.nii.gz'  # segmentation mask with ~100 classes
        logging.info('==============================================')
        logging.info('reading segmentation mask: %s' % name)

        # read the segmentation mask
        _seg_data, _seg_affine, _seg_header = utils.load_nii(name)

        # group the segmentation classes as required
        _seg_data = utils.group_segmentation_classes(_seg_data)

        # ====================================================
        # read the image file
        # ====================================================
        if bias_correction is True:
            name = foldername + 'MPRAGE_n4.nii'  # read the original image
        else:
            name = foldername + 'MPRAGE.nii'  # read the original image

        # ====================================================
        # bias correction  before reading the image file (optional)
        # ====================================================

        # read the image
        logging.info('reading image: %s' % name)
        _img_data, _img_affine, _img_header = utils.load_nii(name)
        # _img_header.get_zooms() = (1.0, 1.0, 1.0)

        # ============
        # create a segmentation mask and use it to get rid of the skull in the image
        # ============
        seg_mask = np.copy(_seg_data)
        seg_mask[_seg_data > 0] = 1
        img_masked = _img_data * seg_mask

        # normalise the image
        _img_data = utils.normalise_image(img_masked, norm_type='div_by_max')

        # ============
        # rescale the image and the segmentation mask so that their pixel size in mm matches that of the hcp images
        # ============
        img_rescaled = rescale(image=_img_data,
                               scale=10 / 7,
                               order=1,
                               preserve_range=True,
                               multichannel=False)
        seg_rescaled = rescale(image=_seg_data,
                               scale=10 / 7,
                               order=0,
                               preserve_range=True,
                               multichannel=False)

        # ============
        # A lot of the periphery is just zeros, so get rid of some of it
        # ============
        # define how much of the image can be cropped out as it consists of zeros
        x_start = 13
        x_end = -14
        y_start = 55
        y_end = -55
        z_start = 55 + 16 + 50
        z_end = -55 - 16 + 50
        # original images are 176 * 256 * 256
        # rescaling them makes them 251 * 366 * 366
        # cropping them down to 224 * 256 * 224
        img_rescaled = img_rescaled[x_start:x_end, y_start:y_end,
                                    z_start:z_end]
        seg_rescaled = seg_rescaled[x_start:x_end, y_start:y_end,
                                    z_start:z_end]

        # save the pre-processed segmentation ground truth
        utils.makefolder(preproc_folder + _patname)
        utils.save_nii(preproc_folder + _patname + '/preprocessed_gt15.nii',
                       seg_rescaled, _seg_affine)
        if bias_correction is True:
            utils.save_nii(
                preproc_folder + _patname + '/preprocessed_image_n4.nii',
                img_rescaled, _img_affine)
        else:
            utils.save_nii(
                preproc_folder + _patname + '/preprocessed_image.nii',
                img_rescaled, _img_affine)

        # append to lists
        images.append(img_rescaled)
        affines.append(_img_affine)
        patnames.append(_patname)
        masks.append(seg_rescaled)

    # convert the lists to arrays
    images = np.array(images)
    affines = np.array(affines)
    patnames = np.array(patnames)
    masks = np.array(masks, dtype='uint8')

    # ========================
    # merge along the y-zis to get a stack of x-z slices, for the images as well as the masks
    # ========================
    images = images.swapaxes(1, 2)
    images = images.reshape(-1, images.shape[2], images.shape[3])
    masks = masks.swapaxes(1, 2)
    masks = masks.reshape(-1, masks.shape[2], masks.shape[3])

    # save the processed images and masks so that they can be directly read the next time
    # make appropriate filenames according to the requested indices of training, validation and test images
    logging.info('Saving pre-processed files...')
    config_details = 'from%dto%d_' % (idx_start, idx_end)

    if bias_correction is True:
        filepath_images = preproc_folder + config_details + 'images_2d_bias_corrected.npy'
    else:
        filepath_images = preproc_folder + config_details + 'images_2d.npy'
    filepath_masks = preproc_folder + config_details + 'annotations15_2d.npy'
    filepath_affine = preproc_folder + config_details + 'affines.npy'
    filepath_patnames = preproc_folder + config_details + 'patnames.npy'

    np.save(filepath_images, images)
    np.save(filepath_masks, masks)
    np.save(filepath_affine, affines)
    np.save(filepath_patnames, patnames)

    return images, masks, affines, patnames
def prepare_data(input_folder, output_file, idx_start, idx_end, protocol, size,
                 target_resolution, preprocessing_folder):
    # ========================
    # read the filenames
    # ========================
    filenames = sorted(glob.glob(input_folder + '*.zip'))
    logging.info('Number of images in the dataset: %s' % str(len(filenames)))

    # =======================
    # create a hdf5 file
    # =======================
    # hdf5_file = h5py.File(output_file, "w")
    #
    # # ===============================
    # # Create datasets for images and labels
    # # ===============================
    # data = {}
    # num_subjects = idx_end - idx_start
    #
    # data['images'] = hdf5_file.create_dataset("images", [num_subjects] + list(size), dtype=np.float32)
    # data['labels'] = hdf5_file.create_dataset("labels", [num_subjects] + list(size), dtype=np.uint8)
    #
    # # ===============================
    # initialize lists
    # ===============================
    label_list = []
    image_list = []
    nx_list = []
    ny_list = []
    nz_list = []
    px_list = []
    py_list = []
    pz_list = []
    pat_names_list = []

    # ===============================
    # initiate counter
    # ===============================
    patient_counter = 0

    # ===============================
    # iterate through the requested indices
    # ===============================
    for idx in range(idx_start, idx_end):
        logging.info('Volume {} of {}...'.format(idx, idx_end))

        # ==================
        # get file paths
        # ==================
        patient_name, image_path, label_path = get_image_and_label_paths(
            filenames[idx], protocol, preprocessing_folder)

        # ============
        # read the image and normalize it to be between 0 and 1
        # ============
        image, _, image_hdr = utils.load_nii(image_path)

        # ==================
        # read the label file
        # ==================
        label, _, _ = utils.load_nii(label_path)
        label = utils.group_segmentation_classes(
            label)  # group the segmentation classes as required

        # # ==================
        # # collect some header info.
        # # ==================
        # px_list.append(float(image_hdr.get_zooms()[0]))
        # py_list.append(float(image_hdr.get_zooms()[1]))
        # pz_list.append(float(image_hdr.get_zooms()[2]))
        # nx_list.append(image.shape[0])
        # ny_list.append(image.shape[1])
        # nz_list.append(image.shape[2])
        # pat_names_list.append(patient_name)

        # ==================
        # crop volume along all axes from the ends (as there are several zeros towards the ends)
        # ==================
        image = utils.crop_or_pad_volume_to_size_along_x(image, 256)
        label = utils.crop_or_pad_volume_to_size_along_x(label, 256)
        image = utils.crop_or_pad_volume_to_size_along_y(image, 256)
        label = utils.crop_or_pad_volume_to_size_along_y(label, 256)
        image = utils.crop_or_pad_volume_to_size_along_z(image, 256)
        label = utils.crop_or_pad_volume_to_size_along_z(label, 256)

        # ==================
        # normalize the image
        # ==================
        image_normalized = utils.normalise_image(image, norm_type='div_by_max')

        # ======================================================
        # rescale, crop / pad to make all images of the required size and resolution
        # ======================================================
        scale_vector = [
            image_hdr.get_zooms()[0] / target_resolution[0],
            image_hdr.get_zooms()[1] / target_resolution[1],
            image_hdr.get_zooms()[2] / target_resolution[2]
        ]

        image_rescaled = transform.rescale(image_normalized,
                                           scale_vector,
                                           order=1,
                                           preserve_range=True,
                                           multichannel=False,
                                           mode='constant')

        # label_onehot = utils.make_onehot(label, nlabels=15)
        #
        # label_onehot_rescaled = transform.rescale(label_onehot,
        #                                           scale_vector,
        #                                           order=1,
        #                                           preserve_range=True,
        #                                           multichannel=True,
        #                                           mode='constant')
        #
        # label_rescaled = np.argmax(label_onehot_rescaled, axis=-1)
        #
        # # ============
        # # the images and labels have been rescaled to the desired resolution.
        # # write them to the hdf5 file now.
        # # ============
        # image_list.append(image_rescaled)
        # label_list.append(label_rescaled)

        # ============
        # write to file
        # ============
        # image_rescaled
        volume_dir = os.path.join(preprocessing_folder,
                                  'volume_{:06d}'.format(idx))
        os.makedirs(volume_dir, exist_ok=True)
        for i in range(size[1]):
            slice_path = os.path.join(volume_dir,
                                      'slice_{:06d}.jpeg'.format(i))
            slice = image_rescaled[:, i, :] * 255
            image = Image.fromarray(slice.astype(np.uint8))
            image.save(slice_path)
예제 #24
0
def prepare_data(input_folder, output_file, mode, size, target_resolution):
    '''
    Main function that prepares a dataset from the raw challenge data to an hdf5 dataset
    '''

    assert (mode in ['2D', '3D']), 'Unknown mode: %s' % mode
    if mode == '2D' and not len(size) == 2:
        raise AssertionError('Inadequate number of size parameters')
    if mode == '3D' and not len(size) == 3:
        raise AssertionError('Inadequate number of size parameters')
    if mode == '2D' and not len(target_resolution) == 2:
        raise AssertionError(
            'Inadequate number of target resolution parameters')
    if mode == '3D' and not len(target_resolution) == 3:
        raise AssertionError(
            'Inadequate number of target resolution parameters')

    image_folder = os.path.join(input_folder, 'Prostate-3T')
    mask_folder = os.path.join(
        input_folder, 'NCI_ISBI_Challenge-Prostate3T_Training_Segmentations')

    hdf5_file = h5py.File(output_file, "w")

    logging.info('Counting files and parsing meta data...')
    folder_list = get_patient_folders(image_folder,
                                      folder_base='Prostate3T-01')

    if mode == '3D':
        nx, ny, nz_max = size
        n_train = len(folder_list['train'])
        n_test = len(folder_list['test'])
        n_val = len(folder_list['validation'])
    elif mode == '2D':
        num_slices = count_slices(image_folder, folder_base='Prostate3T-01')
        nx, ny = size
        n_test = num_slices['test']
        n_train = num_slices['train']
        n_val = num_slices['validation']
    else:
        raise AssertionError('Wrong mode setting. This should never happen.')

    print('Debug: Check if sets add up to correct value:')
    print(n_train, n_val, n_test, n_train + n_val + n_test)

    # Create datasets for images and masks
    data = {}
    for tt, num_points in zip(['test', 'train', 'validation'],
                              [n_test, n_train, n_val]):

        if num_points > 0:
            data['images_%s' % tt] = hdf5_file.create_dataset(
                "images_%s" % tt, [num_points] + list(size), dtype=np.float32)
            data['masks_%s' % tt] = hdf5_file.create_dataset(
                "masks_%s" % tt, [num_points] + list(size), dtype=np.uint8)

    mask_list = {'test': [], 'train': [], 'validation': []}
    img_list = {'test': [], 'train': [], 'validation': []}

    logging.info('Parsing image files')

    for train_test in ['test', 'train', 'validation']:

        write_buffer = 0
        counter_from = 0

        patient_counter = 0

        for folder in folder_list[train_test]:

            patient_counter += 1

            logging.info(
                '-----------------------------------------------------------')
            logging.info('Doing: %s' % folder)

            lstFilesDCM = []  # create an empty list
            for dirName, subdirList, fileList in os.walk(folder):
                # fileList.sort()
                for filename in fileList:
                    if ".dcm" in filename.lower(
                    ):  # check whether the file's DICOM
                        lstFilesDCM.append(os.path.join(dirName, filename))

            # Get ref file
            RefDs = dicom.read_file(lstFilesDCM[0])

            # Load dimensions based on the number of rows, columns, and slices (along the Z axis)
            ConstPixelDims = (int(RefDs.Rows), int(RefDs.Columns),
                              len(lstFilesDCM))

            # Load spacing values (in mm)
            pixel_size = (float(RefDs.PixelSpacing[0]),
                          float(RefDs.PixelSpacing[1]),
                          float(RefDs.SliceThickness))
            # print("pixel spacing 0,1; slice thickness ",ConstPixelSpacing)

            print('PixelDims')
            print(ConstPixelDims)
            print('PixelSpacing')
            print(pixel_size)

            # The array is sized based on 'ConstPixelDims'
            img = np.zeros(ConstPixelDims, dtype=RefDs.pixel_array.dtype)

            # loop through all the DICOM files
            for filenameDCM in lstFilesDCM:
                # read the file
                ds = dicom.read_file(filenameDCM)
                # store the raw image data
                # img[:, :, lstFilesDCM.index(filenameDCM)] = ds.pixel_array # index number field is not set correctly ! instead instance no is the slice no !
                img[:, :, ds.InstanceNumber - 1] = ds.pixel_array

            img = utils.normalise_image(img)

            mask_path = os.path.join(mask_folder,
                                     folder.split('/')[-1] + '.nrrd')
            mask, options = nrrd.read(mask_path)

            # fix swap axis
            mask = np.swapaxes(mask, 0, 1)

            print('mask.shape')
            print(mask.shape)
            print('img.shape')
            print(img.shape)

            ### PROCESSING LOOP FOR SLICE-BY-SLICE 3D DATA ###################
            if mode == '3D':

                scale_vector = [
                    pixel_size[0] / target_resolution[0],
                    pixel_size[1] / target_resolution[1],
                    pixel_size[2] / target_resolution[2]
                ]

                img_scaled = transform.rescale(img,
                                               scale_vector,
                                               order=1,
                                               preserve_range=True,
                                               multichannel=False,
                                               mode='constant')
                mask_scaled = transform.rescale(mask,
                                                scale_vector,
                                                order=0,
                                                preserve_range=True,
                                                multichannel=False,
                                                mode='constant')

                slice_vol = np.zeros((nx, ny, nz_max), dtype=np.float32)
                mask_vol = np.zeros((nx, ny, nz_max), dtype=np.uint8)

                nz_curr = img_scaled.shape[2]
                stack_from = (nz_max - nz_curr) // 2

                if stack_from < 0:
                    raise AssertionError(
                        'nz_max is too small for the chosen through plane resolution. Consider changing'
                        'the size or the target resolution in the through-plane.'
                    )

                for zz in range(nz_curr):

                    slice_rescaled = img_scaled[:, :, zz]
                    mask_rescaled = mask_scaled[:, :, zz]

                    slice_cropped = crop_or_pad_slice_to_size(
                        slice_rescaled, nx, ny)
                    mask_cropped = crop_or_pad_slice_to_size(
                        mask_rescaled, nx, ny)

                    slice_vol[:, :, stack_from] = slice_cropped
                    mask_vol[:, :, stack_from] = mask_cropped

                    stack_from += 1

                img_list[train_test].append(slice_vol)
                mask_list[train_test].append(mask_vol)

                write_buffer += 1

                if write_buffer >= MAX_WRITE_BUFFER:

                    counter_to = counter_from + write_buffer
                    _write_range_to_hdf5(data, train_test, img_list, mask_list,
                                         counter_from, counter_to)
                    _release_tmp_memory(img_list, mask_list, train_test)

                    # reset stuff for next iteration
                    counter_from = counter_to
                    write_buffer = 0

            ### PROCESSING LOOP FOR SLICE-BY-SLICE 2D DATA ###################
            elif mode == '2D':

                scale_vector = [
                    pixel_size[0] / target_resolution[0],
                    pixel_size[1] / target_resolution[1]
                ]

                for zz in range(img.shape[2]):

                    slice_img = np.squeeze(img[:, :, zz])
                    slice_rescaled = transform.rescale(slice_img,
                                                       scale_vector,
                                                       order=1,
                                                       preserve_range=True,
                                                       multichannel=False,
                                                       mode='constant')

                    slice_mask = np.squeeze(mask[:, :, zz])
                    mask_rescaled = transform.rescale(slice_mask,
                                                      scale_vector,
                                                      order=0,
                                                      preserve_range=True,
                                                      multichannel=False,
                                                      mode='constant')

                    slice_cropped = crop_or_pad_slice_to_size(
                        slice_rescaled, nx, ny)
                    mask_cropped = crop_or_pad_slice_to_size(
                        mask_rescaled, nx, ny)

                    img_list[train_test].append(slice_cropped)
                    mask_list[train_test].append(mask_cropped)

                    write_buffer += 1

                    # Writing needs to happen inside the loop over the slices
                    if write_buffer >= MAX_WRITE_BUFFER:

                        counter_to = counter_from + write_buffer
                        _write_range_to_hdf5(data, train_test, img_list,
                                             mask_list, counter_from,
                                             counter_to)
                        _release_tmp_memory(img_list, mask_list, train_test)

                        # reset stuff for next iteration
                        counter_from = counter_to
                        write_buffer = 0

        logging.info('Writing remaining data')
        counter_to = counter_from + write_buffer

        _write_range_to_hdf5(data, train_test, img_list, mask_list,
                             counter_from, counter_to)
        _release_tmp_memory(img_list, mask_list, train_test)

    # After test train loop:
    hdf5_file.close()