def main():
    print('\nBuilding SUPERVISED sets.')
    build_sup_sets(source_dir)
    build_scribble_sets(source_dir)

    print('Building UNSUPERVISED sets.')
    build_unsup_sets(source_dir)

    print('Building DISCRIMINATOR sets.')
    build_disc_sets(source_dir)

    print_yellow_text('\nDone.\n', sep=False)
import numpy as np
import nibabel as nib
from glob import glob
from idas.utils.utils import print_yellow_text
import os
import cv2
import tensorflow as tf

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_VISIBLE_DEVICES"] = str(1)
N_CLASSES = 4

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# data set dirs:

print_yellow_text('')
source_dir = '../DATA/ACDC'

print('\033[1;33mSource_dir = \033[0m{0}'.format(source_dir))

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# arrays with shape [None, width, height, channels]
# the mean width and height and the mean spatial resolutions for the ACDC data set are (rounded to the closest int)
mean_width = 227.03  # values computed after resizing the data set to the same resolution
mean_height = 254.88  # values computed after resizing the data set to the same resolution

mean_dx = 1.5117105
mean_dy = 1.5117105
img_dx = mean_dx
img_dy = mean_dy
    def get_data(self,
                 b_size,
                 augment=False,
                 repeat=False,
                 num_threads=4,
                 seed=None):
        """ Returns iterators on the dataset along with their initializers.
        :param b_size: batch size
        :param augment: if to perform data augmentation
        :param repeat: (bool) whether to repeat the input indefinitely
        :param num_threads: for parallel computing
        :param seed: (int or placeholder) seed for the random operations
        :return: train_init, valid_init, input_data, label
        """
        with tf.name_scope('{0}_data'.format(self.dataset_name)):

            _train_data = tf.constant(self.x_train_paths)
            _valid_data = tf.constant(self.x_validation_paths)
            train_data = tf.data.Dataset.from_tensor_slices(_train_data)
            valid_data = tf.data.Dataset.from_tensor_slices(_valid_data)

            train_data = train_data.shuffle(buffer_size=len(
                self.x_train_paths),
                                            seed=seed)

            train_data = train_data.map(
                lambda filename: tf.py_func(  # Parse the record into tensors
                    self.data_parser,
                    inp=[filename],
                    Tout=[tf.float32]),
                num_parallel_calls=num_threads)

            valid_data = valid_data.map(
                lambda filename: tf.py_func(  # Parse the record into tensors
                    self.data_parser,
                    inp=[filename],
                    Tout=[tf.float32]),
                num_parallel_calls=num_threads)

            # - - - - - - - - - - - - - - - - - - - -

            if augment:
                train_data = train_data.map(
                    lambda y: self._data_augmentation_ops(y),
                    num_parallel_calls=num_threads)
                valid_data = valid_data.map(lambda y: tf.cast(y, tf.float32),
                                            num_parallel_calls=num_threads)

            if repeat:
                if self.verbose:
                    print_yellow_text(
                        ' --> Repeat the input indefinitely  = True',
                        sep=False)
                train_data = train_data.repeat(
                )  # Repeat the input indefinitely

            # un-batch first, then batch the data
            train_data = train_data.apply(tf.data.experimental.unbatch())
            valid_data = valid_data.apply(tf.data.experimental.unbatch())

            seed2 = seed + 1
            train_data = train_data.shuffle(buffer_size=len(
                self.x_train_paths),
                                            seed=seed2)

            train_data = train_data.batch(b_size, drop_remainder=True)
            valid_data = valid_data.batch(b_size, drop_remainder=True)

            # if len(get_available_gpus()) > 0:
            #     # prefetch data to the GPU
            #     # train_data = train_data.apply(tf.data.experimental.prefetch_to_device("/gpu:0"))
            #     train_data = train_data.apply(tf.data.experimental.copy_to_device("/gpu:0")).prefetch(1)

            iterator = tf.data.Iterator.from_structure(
                train_data.output_types, train_data.output_shapes)

            _input_data = iterator.get_next()
            train_init = iterator.make_initializer(
                train_data)  # initializer for train_data
            valid_init = iterator.make_initializer(
                valid_data)  # initializer for valid_data

            with tf.name_scope('disc_data'):
                input_data = tf.reshape(_input_data,
                                        shape=[
                                            -1, self.input_size[0],
                                            self.input_size[1], self.n_classes
                                        ])
                input_data = tf.cast(input_data, tf.float32)

            return train_init, valid_init, input_data
Exemplo n.º 4
0
    def get_data(self,
                 b_size,
                 augment=False,
                 standardize=False,
                 repeat=False,
                 num_threads=4,
                 seed=None,
                 shuffle_validation=False):
        """ Returns iterators on the dataset along with their initializers.
        :param b_size: batch size
        :param augment: if to perform data augmentation
        :param standardize: if to standardize the input data
        :param repeat: (bool) whether to repeat the input indefinitely
        :param shuffle_validation: (bool) whether to shuffle slices in the validation set
        :param num_threads: for parallel computing
        :param seed: (int or placeholder) seed for the random operations
        :return: train_init, valid_init, input_img, scribble, label
        """
        with tf.name_scope('{0}_data'.format(self.dataset_name)):

            _train_data = tf.constant(self.x_train_paths)
            _valid_data = tf.constant(self.x_validation_paths)
            _test_data = tf.constant(self.x_test_paths)
            train_data = tf.data.Dataset.from_tensor_slices(_train_data)
            valid_data = tf.data.Dataset.from_tensor_slices(_valid_data)
            test_data = tf.data.Dataset.from_tensor_slices(_test_data)

            train_data = train_data.shuffle(buffer_size=len(
                self.x_train_paths),
                                            seed=seed)

            train_data = train_data.map(
                lambda filename: tf.py_func(  # Parse the record into tensors
                    self.data_parser,
                    inp=[filename, standardize],
                    Tout=[tf.float32, tf.float32, tf.float32]),
                num_parallel_calls=num_threads)

            valid_data = valid_data.map(
                lambda filename: tf.py_func(  # Parse the record into tensors
                    self.data_parser,
                    inp=[filename, standardize],
                    Tout=[tf.float32, tf.float32, tf.float32]),
                num_parallel_calls=num_threads)

            test_data = test_data.map(
                lambda filename: tf.py_func(  # Parse the record into tensors
                    self.data_parser,
                    inp=[filename, standardize],
                    Tout=[tf.float32, tf.float32, tf.float32]),
                num_parallel_calls=num_threads)

            # - - - - - - - - - - - - - - - - - - - -

            if augment:
                train_data = train_data.map(
                    lambda x, y, z: self._data_augmentation_ops(x, y, z),
                    num_parallel_calls=num_threads)

            if repeat:
                if self.verbose:
                    print_yellow_text(
                        ' --> Repeat the input indefinitely  = True',
                        sep=False)
                train_data = train_data.repeat(
                )  # Repeat the input indefinitely

            # un-batch first, then batch the data
            train_data = train_data.apply(tf.data.experimental.unbatch())
            valid_data = valid_data.apply(tf.data.experimental.unbatch())
            # test_data = test_data.apply(tf.data.experimental.unbatch())

            seed2 = seed + 1
            train_data = train_data.shuffle(buffer_size=len(
                self.x_train_paths),
                                            seed=seed2)

            # shuffle validation to have mixed slices (better visuals)
            if shuffle_validation:
                valid_data = valid_data.shuffle(
                    buffer_size=len(self.x_validation_paths))

            train_data = train_data.batch(b_size, drop_remainder=True)
            valid_data = valid_data.batch(b_size, drop_remainder=True)

            # test on each patient independently
            test_data = test_data.batch(1)
            test_data = test_data.map(lambda x, y, z: (x[0], y[0], z[0]),
                                      num_parallel_calls=num_threads)

            # if len(get_available_gpus()) > 0:
            #     # prefetch data to the GPU
            #     # train_data = train_data.apply(tf.data.experimental.prefetch_to_device("/gpu:0"))
            #     train_data = train_data.apply(tf.data.experimental.copy_to_device("/gpu:0")).prefetch(1)

            iterator = tf.data.Iterator.from_structure(
                train_data.output_types, train_data.output_shapes)

            _input_img, _output_scrib, _output_mask = iterator.get_next()
            train_init = iterator.make_initializer(
                train_data)  # initializer for train_data
            valid_init = iterator.make_initializer(
                valid_data)  # initializer for valid_data
            test_init = iterator.make_initializer(
                test_data)  # initializer for test_data

            with tf.name_scope('input_img_sup'):
                input_img = tf.reshape(_input_img,
                                       shape=[
                                           -1, self.input_size[0],
                                           self.input_size[1],
                                           self.n_channels_in
                                       ])
                input_img = tf.cast(input_img, tf.float32)

            with tf.name_scope('output_scrib_sup'):
                output_scrib = tf.reshape(_output_scrib,
                                          shape=[
                                              -1, self.input_size[0],
                                              self.input_size[1],
                                              self.n_classes
                                          ])
                output_scrib = tf.cast(output_scrib, tf.float32)

            with tf.name_scope('output_mask_sup'):
                output_mask = tf.reshape(_output_mask,
                                         shape=[
                                             -1, self.input_size[0],
                                             self.input_size[1], self.n_classes
                                         ])
                output_mask = tf.cast(output_mask, tf.float32)

            return train_init, valid_init, test_init, input_img, output_scrib, output_mask
    def train(self, n_epochs):
        """ The train function alternates between training one epoch and evaluating """
        if self.verbose:
            print(
                "\nStarting network training... Number of epochs to train: \033[94m{0}\033[0m"
                .format(n_epochs))
            print("Tensorboard verbose mode: \033[94m{0}\033[0m".format(
                self.tensorboard_verbose))
            print("Tensorboard dir: \033[94m{0}\033[0m".format(self.graph_dir))
            print(
                "Data augmentation: \033[94m{0}\033[0m, Data standardization: \033[94m{1}\033[0m."
                .format(self.augment, self.standardize))

        utils.safe_mkdir(self.checkpoint_dir)
        utils.safe_mkdir(self.history_log_dir)
        writer = tf.summary.FileWriter(self.graph_dir, tf.get_default_graph())

        # config for the session: allow growth for GPU to avoid OOM when other processes are running
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(tf.local_variables_initializer())

            saver = tf.train.Saver(
                max_to_keep=2)  # keep_checkpoint_every_n_hours=2
            ckpt = tf.train.get_checkpoint_state(
                os.path.dirname(self.last_checkpoint_dir + '/checkpoint'))
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)

            trained_epochs = self.g_epoch.eval()
            if self.verbose:
                print("Model already trained for \033[94m{0}\033[0m epochs.".
                      format(trained_epochs))
            t_step = self.g_train_step.eval()  # global step for train
            v_step = self.g_valid_step.eval()  # global step for validation
            test_step = self.g_test_step.eval()  # global step for test

            # Define a caller to call the callbacks
            self.callbacks_kwargs.update({'sess': sess, 'cnn': self})
            caller = tf_callbacks.ChainCallback(callbacks=self.callbacks)
            caller.on_train_begin(training_state=True, **self.callbacks_kwargs)

            # trick to find performance bugs: this will raise an exception if any new node is inadvertently added to the
            # graph. This will ensure that I don't add many times the same node to the graph (which could be expensive):
            tf.get_default_graph().finalize()

            # saving callback:
            self.callbacks_kwargs[
                'es_loss'] = 100  # some random initialization

            if self.ep_offset is None:
                print('\nNo offset specified for the early stopping criterion')
                self.ep_offset = n_epochs // 2
                print(
                    ' >> Proceeding with default early stopping offset: \033[94m{0} epochs\033[0m'
                    .format(self.ep_offset))

            for epoch in range(n_epochs):
                ep_str = str(epoch + 1) if (
                    trained_epochs == 0
                ) else '({0}+) '.format(trained_epochs) + str(epoch + 1)
                print('_' * 40 +
                      '\n\033[1;33mEPOCH {0}\033[0m - {1} : '.format(
                          ep_str, self.run_id))
                caller.on_epoch_begin(training_state=True,
                                      **self.callbacks_kwargs)

                global_ep = sess.run(self.g_epoch)
                sess.run(self.update_lr)

                seed = global_ep

                # TRAIN MODE ------------------------------------------
                iterator_init_list = [
                    self.acdc_sup_train_init, self.acdc_unsup_train_init,
                    self.acdc_disc_train_init
                ]
                t_step = self.train_one_epoch(sess, iterator_init_list, writer,
                                              t_step, caller, seed)

                # VALIDATION MODE ------------------------------------------
                if global_ep >= self.ep_offset or not (
                    (global_ep + 1) % 15):  # when to evaluate the model
                    iterator_init_list = [
                        self.acdc_sup_valid_init, self.acdc_disc_valid_init,
                        self.acdc_unsup_valid_init
                    ]
                    v_step, val_loss = self.eval_once(sess, iterator_init_list,
                                                      writer, v_step, caller)

                    if global_ep >= self.ep_offset:
                        self.callbacks_kwargs['es_loss'] = val_loss
                    sess.run(self.update_last_val_loss,
                             feed_dict={'best_val_loss_value:0': val_loss})

                if self.tensorboard_verbose and (global_ep % 10 == 0):
                    # writing summary for the weights:
                    summary = sess.run(self.weights_summary)
                    writer.add_summary(summary, global_step=t_step)

                try:
                    caller.on_epoch_end(training_state=True,
                                        **self.callbacks_kwargs)
                except EarlyStoppingException:
                    utils.print_yellow_text('\nEarly stopping...\n')
                    break
                except NeedForTestException:
                    if global_ep >= self.ep_offset:  # minimum epochs to wait before applying early stopping
                        # save the updated variables and weights --> this is the best model found (early stopping)
                        saver.save(sess, self.checkpoint_dir + '/checkpoint',
                                   t_step)

            caller.on_train_end(training_state=True, **self.callbacks_kwargs)

            # end of the training: save the current weights in a new sub-directory
            utils.safe_mkdir(self.checkpoint_dir + '/last_model')
            saver.save(sess, self.checkpoint_dir + '/last_model/checkpoint',
                       t_step)

            # load best model and do a test:
            ckpt = tf.train.get_checkpoint_state(
                os.path.dirname(self.checkpoint_dir + '/checkpoint'))
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)

            _ = self.test_once(sess, self.acdc_sup_test_init, writer,
                               test_step, caller)

        writer.close()
import cv2
import tensorflow as tf

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_VISIBLE_DEVICES"] = str(1)
N_CLASSES = 4

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

ONLY_SUPERVISED_SET = False  # this is the only set that actually changes. The first time, run this script with this
#                            # flag set to False

GENERATE_FAKE_SCRIBBLES = False  # if you want to generate synthetic scribbles from segmentation masks

if ONLY_SUPERVISED_SET:
    print_yellow_text(
        '\nONLY_SUPERVISED_SET = {0}\n'.format(ONLY_SUPERVISED_SET))
if GENERATE_FAKE_SCRIBBLES:
    print_yellow_text(
        '\nThe flag GENERATE_FAKE_SCRIBBLES will be ignored because real scribbles are '
        'available for this dataset')

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# data set dirs:

print_yellow_text('')
source_dir = './data/acdc_data/'

print('\033[1;33mSource_dir = \033[0m{0}'.format(source_dir))

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# arrays with shape [None, width, height, channels]