예제 #1
0
def preprocess_image(image, preprocess, im_size):
    """Preprocess image files before encoding in TFrecords."""
    if 'crop_center' in preprocess:
        image = image_processing.crop_center(image, im_size)
    elif 'crop_center_resize' in preprocess:
        im_shape = image.shape
        min_shape = np.min(im_shape[:2])
        crop_im_size = [min_shape, min_shape, im_shape[-1]]
        image = image_processing.crop_center(image, crop_im_size)
        image = image_processing.resize(image, im_size)
    elif 'resize' in preprocess:
        image = image_processing.resize(image, im_size)
    elif 'pad_resize' in preprocess:
        image = image_processing.pad_square(image)
        image = image_processing.resize(image, im_size)
    return image.astype(np.float32)
def preprocess_image(image, preprocess, im_size):
    """Preprocess image files before encoding in TFrecords."""
    if 'crop_center' in preprocess:
        image = image_processing.crop_center(image, im_size)
    elif 'resize' in preprocess:
        image = image_processing.resize(image, im_size)
    elif 'pad_resize' in preprocess:
        image = image_processing.pad_square(image)
        image = image_processing.resize(image, im_size)
    return image
예제 #3
0
def preprocess_image(image, preprocess, im_size, process_label=False):
    """Preprocess image files before encoding in TFrecords."""
    for pre in preprocess:
        if pre == 'crop_center':
            image = image_processing.crop_center(image, im_size)
        elif pre == 'crop_center_resize':
            im_shape = image.shape
            min_shape = np.min(im_shape[:2])
            crop_im_size = [min_shape, min_shape, im_shape[-1]]
            image = image_processing.crop_center(image, crop_im_size)
            image = image_processing.resize(image, im_size)
        elif pre == 'resize':
            image = image_processing.resize(image, im_size)
        elif pre == 'pad_resize':
            image = image_processing.pad_square(image)
            image = image_processing.resize(image, im_size)
        if pre == 'hed_pad':
            image = hed_pad(image, process_label=process_label)
        if pre == 'trim_extra_dims':
            im_shape = image.shape
            if im_shape[-1] > im_size[-1]:
                image = image[..., :im_size[-1]]
            elif im_shape[-1] == im_size[-1]:
                pass
            else:
                raise RuntimeError('Failed preproc on trim_extra_dims.')
        if pre == 'rgba2rgb':
            image = image[:, :, :-1]
        if pre == 'to_float32':
            image = image.astype(np.float32)
        if pre == 'rgba2gray':
            image = color.rgb2gray(image[:, :, :-1])
        if pre == 'exclude_white':
            thresh = 0.25
            hw = np.prod(image.shape[:-1])
            white_check = np.sum(
                np.std(image, axis=-1) < 0.01) / hw
            if white_check > thresh:
                return False
    return image.astype(np.float32)
예제 #4
0
    def get_data(self):
        """Get the names of files."""

        all_ims = glob(
            os.path.join(
                self.kras_dir,
                '*%s' % self.im_extension))
        all_ims = np.array(all_ims)
        all_im_names = np.copy(all_ims)
        all_labels = np.array(
            [1
                if x.split(os.path.sep)[-1].split('_')[0] == 'KRAS'
                else 0 for x in all_ims])
        assert len(all_ims) == len(all_labels)

        # Load the images and then normalize per-slide
        if self.calculate_moments:
            all_slides = np.array(
                [x.split(os.path.sep)[-1].split('_')[1] for x in all_ims])
            means, stds = {}, {}
            unique_slides = np.unique(all_slides)

            # First load the images
            image_data = np.zeros(
                [len(all_ims)] + self.im_size, dtype=np.float32)
            exclude_idx = np.ones(len(all_ims)).astype(bool)
            hw = np.prod(self.im_size[:-1])
            for idx, f in tqdm(
                    enumerate(all_ims),
                    total=len(all_ims),
                    desc='Loading images'):
                image_data[idx] = image_processing.crop_center(
                    io.imread(f)[:, :, :-1], self.im_size).astype(np.float32)
                thresh = 0.25
                white_check = np.sum(
                    np.std(image_data[idx], axis=-1) < 0.01) / hw
                if white_check > thresh:
                    exclude_idx[idx] = False
            all_slides = all_slides[exclude_idx]
            all_ims = all_ims[exclude_idx]
            all_labels = all_labels[exclude_idx]
            image_data = image_data[exclude_idx]
            for slide_idx in tqdm(
                    unique_slides,
                    desc='Per-slide moments',
                    total=len(unique_slides)):
                idx = all_slides == slide_idx
                it_files = image_data[idx]
                im_stack = np.zeros(
                    [len(it_files)] + self.im_size, dtype=np.float32)
                for fidx, f in tqdm(enumerate(it_files), total=len(it_files)):
                    try:
                        im_stack[fidx, :, :] = f
                    except Exception:
                        print 'Failed to load image'
                means[slide_idx] = np.mean(im_stack.astype(np.float32))
                stds[slide_idx] = np.std(im_stack.astype(np.float32))

            for slide_idx in tqdm(
                    unique_slides,
                    desc='Applying moments',
                    total=len(unique_slides)):
                idx = all_slides == slide_idx
                image_data[idx] = (
                    image_data[idx] - means[slide_idx]) / stds[slide_idx]
            all_ims = image_data

        # Split into CV sets
        val_idx = np.zeros(len(all_im_names), dtype=bool)
        for m in tqdm(
                self.val_set,
                desc='Processing the validation index',
                total=len(self.val_set)):
            matches = np.array(
                [True if m in x else False for x in all_im_names])
            val_idx += matches
        train_ims = all_ims[~val_idx]
        train_labels = all_labels[~val_idx]
        val_ims = all_ims[val_idx]
        val_labels = all_labels[val_idx]

        # Balance +/- train sizes
        print 'Began with %s train images and %s val images' % (
            len(train_ims), len(val_ims))
        pos_examples = train_labels.sum()
        imbalance = pos_examples - (len(train_labels) - pos_examples)
        if imbalance > 0:
            neg_files = train_ims[train_labels == 0]
            neg_labels = train_labels[train_labels == 0]
            train_ims = np.concatenate((train_ims, neg_files))
            train_labels = np.concatenate((train_labels, neg_labels))
        else:
            pos_files = train_ims[train_labels == 1]
            pos_labels = train_labels[train_labels == 1]
            train_ims = np.concatenate((train_ims, pos_files))
            train_labels = np.concatenate((train_labels, pos_labels))
        print 'Balanced the train set to %s images' % len(train_ims)
        if self.shuffle:
            def shuffle_set(ims, labels):
                """Apply random shuffle to ims and labels."""
                rand_idx = np.random.permutation(len(ims))
                assert len(ims) == len(labels)
                ims = ims[rand_idx]
                labels = labels[rand_idx]
                return ims, labels
            train_ims, train_labels = shuffle_set(train_ims, train_labels)
            val_ims, val_labels = shuffle_set(val_ims, val_labels)

        # Build CV dict
        cv_files, cv_labels = {}, {}
        cv_files[self.folds['train']] = train_ims
        cv_files[self.folds['val']] = val_ims
        cv_files[self.folds['test']] = val_ims
        cv_labels[self.folds['train']] = train_labels
        cv_labels[self.folds['val']] = val_labels
        cv_labels[self.folds['test']] = val_labels
        return cv_files, cv_labels