def prepare_data(input_folder, preproc_folder, idx_start, idx_end,
                 bias_correction):

    images = []
    affines = []
    patnames = []
    masks = []

    # read the foldernames
    foldernames = sorted(glob.glob(input_folder + '*/'))
    logging.info('Number of images in the dataset: %s' % str(len(foldernames)))

    # iterate through all indices
    for idx in range(len(foldernames)):

        # only consider images within the indices requested
        if (idx < idx_start) or (idx >= idx_end):
            logging.info('skipping subject: %d' % idx)
            continue

        # get the file name for this subject
        foldername = foldernames[idx]

        # extract the patient name
        _patname = foldername[foldername[:-1].rfind('/') + 1:-1]
        if _patname == 'A00033264':  # this subject has images of a different size
            continue

        # ====================================================
        # search for the segmentation file
        # ====================================================
        name = foldername + 'orig_labels_aligned_with_true_image.nii.gz'  # segmentation mask with ~100 classes
        logging.info('==============================================')
        logging.info('reading segmentation mask: %s' % name)

        # read the segmentation mask
        _seg_data, _seg_affine, _seg_header = utils.load_nii(name)

        # group the segmentation classes as required
        _seg_data = utils.group_segmentation_classes(_seg_data)

        # ====================================================
        # read the image file
        # ====================================================
        if bias_correction is True:
            name = foldername + 'MPRAGE_n4.nii'  # read the original image
        else:
            name = foldername + 'MPRAGE.nii'  # read the original image

        # ====================================================
        # bias correction  before reading the image file (optional)
        # ====================================================

        # read the image
        logging.info('reading image: %s' % name)
        _img_data, _img_affine, _img_header = utils.load_nii(name)
        # _img_header.get_zooms() = (1.0, 1.0, 1.0)

        # ============
        # create a segmentation mask and use it to get rid of the skull in the image
        # ============
        seg_mask = np.copy(_seg_data)
        seg_mask[_seg_data > 0] = 1
        img_masked = _img_data * seg_mask

        # normalise the image
        _img_data = utils.normalise_image(img_masked, norm_type='div_by_max')

        # ============
        # rescale the image and the segmentation mask so that their pixel size in mm matches that of the hcp images
        # ============
        img_rescaled = rescale(image=_img_data,
                               scale=10 / 7,
                               order=1,
                               preserve_range=True,
                               multichannel=False)
        seg_rescaled = rescale(image=_seg_data,
                               scale=10 / 7,
                               order=0,
                               preserve_range=True,
                               multichannel=False)

        # ============
        # A lot of the periphery is just zeros, so get rid of some of it
        # ============
        # define how much of the image can be cropped out as it consists of zeros
        x_start = 13
        x_end = -14
        y_start = 55
        y_end = -55
        z_start = 55 + 16 + 50
        z_end = -55 - 16 + 50
        # original images are 176 * 256 * 256
        # rescaling them makes them 251 * 366 * 366
        # cropping them down to 224 * 256 * 224
        img_rescaled = img_rescaled[x_start:x_end, y_start:y_end,
                                    z_start:z_end]
        seg_rescaled = seg_rescaled[x_start:x_end, y_start:y_end,
                                    z_start:z_end]

        # save the pre-processed segmentation ground truth
        utils.makefolder(preproc_folder + _patname)
        utils.save_nii(preproc_folder + _patname + '/preprocessed_gt15.nii',
                       seg_rescaled, _seg_affine)
        if bias_correction is True:
            utils.save_nii(
                preproc_folder + _patname + '/preprocessed_image_n4.nii',
                img_rescaled, _img_affine)
        else:
            utils.save_nii(
                preproc_folder + _patname + '/preprocessed_image.nii',
                img_rescaled, _img_affine)

        # append to lists
        images.append(img_rescaled)
        affines.append(_img_affine)
        patnames.append(_patname)
        masks.append(seg_rescaled)

    # convert the lists to arrays
    images = np.array(images)
    affines = np.array(affines)
    patnames = np.array(patnames)
    masks = np.array(masks, dtype='uint8')

    # ========================
    # merge along the y-zis to get a stack of x-z slices, for the images as well as the masks
    # ========================
    images = images.swapaxes(1, 2)
    images = images.reshape(-1, images.shape[2], images.shape[3])
    masks = masks.swapaxes(1, 2)
    masks = masks.reshape(-1, masks.shape[2], masks.shape[3])

    # save the processed images and masks so that they can be directly read the next time
    # make appropriate filenames according to the requested indices of training, validation and test images
    logging.info('Saving pre-processed files...')
    config_details = 'from%dto%d_' % (idx_start, idx_end)

    if bias_correction is True:
        filepath_images = preproc_folder + config_details + 'images_2d_bias_corrected.npy'
    else:
        filepath_images = preproc_folder + config_details + 'images_2d.npy'
    filepath_masks = preproc_folder + config_details + 'annotations15_2d.npy'
    filepath_affine = preproc_folder + config_details + 'affines.npy'
    filepath_patnames = preproc_folder + config_details + 'patnames.npy'

    np.save(filepath_images, images)
    np.save(filepath_masks, masks)
    np.save(filepath_affine, affines)
    np.save(filepath_patnames, patnames)

    return images, masks, affines, patnames
Пример #2
0
def main():

    patients = []

    for dir in directories:
        for patient in os.listdir(dir):
            if not patient.startswith('.'):
                patients.append({
                    'patient': patient,
                    'path': os.path.join(dir, patient)
                })

    print(len(patients))

    #shuffle and split into train and validation sets
    np.random.shuffle(patients)
    print(opt.split * len(patients))
    split_index = int(opt.split * len(patients))
    training_set = patients[:split_index]
    validation_set = patients[split_index:]

    print('Training set: {} patients'.format(len(training_set)))
    print('Validation set: {} patients'.format(len(validation_set)))

    with tqdm(enumerate(training_set),
              total=len(training_set)) as pbar:  # progress bar
        for i, item in pbar:
            for root, dirs, files in os.walk(item['path']):
                for file in files:
                    if file.endswith('.nii.gz'):

                        full_file_path = os.path.join(root, file)

                        output_path = os.path.join(
                            opt.output_dir, 'train', item['patient'],
                            file.replace('.nii.gz', '.nii'))

                        # Create output directory if not yet exists
                        directory = os.path.dirname(output_path)
                        if not os.path.exists(directory):
                            os.makedirs(directory)

                        # Read NifTI file
                        data, affine = utils.read_nii(full_file_path)

                        # Normalize
                        if not file.endswith('seg.nii.gz'):
                            data, stats = normalize(data)

                        # Transpose
                        data = data.transpose(2, 0, 1)

                        # Save to file
                        utils.save_nii(data, output_path)

    with tqdm(enumerate(validation_set),
              total=len(validation_set)) as pbar:  # progress bar
        for i, item in pbar:
            for root, dirs, files in os.walk(item['path']):
                for file in files:
                    if file.endswith('.nii.gz'):

                        full_file_path = os.path.join(root, file)

                        output_path = os.path.join(
                            opt.output_dir, 'val', item['patient'],
                            file.replace('.nii.gz', '.nii'))

                        # Create output directory if not yet exists
                        directory = os.path.dirname(output_path)
                        if not os.path.exists(directory):
                            os.makedirs(directory)

                        # Read NifTI file
                        data, affine = utils.read_nii(full_file_path)

                        # Normalize
                        if not file.endswith('seg.nii.gz'):
                            data, stats = normalize(data)

                        # Transpose
                        data = data.transpose(2, 0, 1)

                        # Save to file
                        utils.save_nii(data, output_path)