示例#1
0
def test_generator_enqueuer_fail_threads():
    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_threads(
        FaultSequence()), use_multiprocessing=False)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    with pytest.raises(IndexError):
        next(gen_output)
示例#2
0
def test_generator_enqueuer_fail_processes():
    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs(
        FaultSequence()), use_multiprocessing=True)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    with pytest.raises(StopIteration):
        next(gen_output)
示例#3
0
def DatasetIterator(dataset_path, size, iter_sampler, 
                     use_multiprocessing=False, 
                     workers=1, max_queue_size=10):
    '''Iterate over the dataset
    # Arguments
        dataset_path: path to `.h5` dataset
        size: batch size
        iter_sampler: callable. Should return iteration to be sliced
        use_multiprocessing: use multiprocessing for workers, Bool
        workers: number of workers, int
        max_queue_size: maximum queue size, int
    # Output
        x_batch, y_batch
        
        x_batch: input tensor of size `(size, height, width, 2)`
        y_batch: output mask of size `(size, height, width, 1)`
    '''
    
    core_gen = minibatch_generator(dataset_path=dataset_path, size=size, iter_sampler=iter_sampler)
    enqueuer = GeneratorEnqueuer(generator=core_gen, use_multiprocessing=use_multiprocessing)
    enqueuer.start(workers, max_queue_size=max_queue_size)
    generator = enqueuer.get()
    
    while True:
        try:
            yield next(generator)
        except StopIteration:
            return    
示例#4
0
def test_generator_enqueuer_fail_threads():
    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_threads(
        FaultSequence()),
                                 use_multiprocessing=False)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    with pytest.raises(IndexError):
        next(gen_output)
def test_generator_enqueuer_fail_processes():
    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs(
        FaultSequence()),
                                 use_multiprocessing=True)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    with pytest.raises(StopIteration):
        next(gen_output)
示例#6
0
    def __init__(self, data_loader):
        super().__init__(data_loader)

        generator = _MPAssistantIterator(self.dataset)
        self.enqueuer = GeneratorEnqueuer(generator, use_multiprocessing=True)
        self.enqueuer.start(workers=self.num_workers,
                            max_queue_size=self.queue_size)
        self.output_generator = self.enqueuer.get()

        self.buffer = []
示例#7
0
def test_missing_inputs():
    missing_idx = 10

    class TimeOutSequence(DummySequence):
        def __getitem__(self, item):
            if item == missing_idx:
                time.sleep(120)
            return super(TimeOutSequence, self).__getitem__(item)

    enqueuer = GeneratorEnqueuer(create_finite_generator_from_sequence_pcs(
        TimeOutSequence([3, 2, 2, 3])),
                                 use_multiprocessing=True)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    with pytest.warns(UserWarning, match='An input could not be retrieved.'):
        for _ in range(4 * missing_idx):
            next(gen_output)

    enqueuer = OrderedEnqueuer(TimeOutSequence([3, 2, 2, 3]),
                               use_multiprocessing=True)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    warning_msg = "The input {} could not be retrieved.".format(missing_idx)
    with pytest.warns(UserWarning, match=warning_msg):
        for _ in range(11):
            next(gen_output)
示例#8
0
def test_missing_inputs():
    missing_idx = 10

    class TimeOutSequence(DummySequence):
        def __getitem__(self, item):
            if item == missing_idx:
                time.sleep(120)
            return super(TimeOutSequence, self).__getitem__(item)

    enqueuer = GeneratorEnqueuer(create_finite_generator_from_sequence_pcs(
        TimeOutSequence([3, 2, 2, 3])), use_multiprocessing=True)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    with pytest.warns(UserWarning, match='An input could not be retrieved.'):
        for _ in range(4 * missing_idx):
            next(gen_output)

    enqueuer = OrderedEnqueuer(TimeOutSequence([3, 2, 2, 3]),
                               use_multiprocessing=True)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    warning_msg = "The input {} could not be retrieved.".format(missing_idx)
    with pytest.warns(UserWarning, match=warning_msg):
        for _ in range(11):
            next(gen_output)
示例#9
0
    def _predict(self,
                 data_generator_function,
                 steps_per_epoch,
                 include_datum=True):
        data_generator = data_generator_function(include_datum=True)
        enqueuer = GeneratorEnqueuer(data_generator)
        enqueuer.start(workers=self._WORKERS, max_queue_size=self._MAX_Q_SIZE)

        caption_results = []
        datum_results = []
        for _ in tqdm(range(steps_per_epoch)):
            generator_output = None
            while enqueuer.is_running():
                if not enqueuer.queue.empty():
                    generator_output = enqueuer.get()
                    break
                else:
                    sleep(self._WAIT_TIME)

            X, y, datum_batch = next(generator_output)
            captions_pred_str = self._predict_batch(X, y)
            caption_results += captions_pred_str
            datum_results += datum_batch

        enqueuer.stop()

        if include_datum:
            return zip(caption_results, datum_results)
        else:
            return caption_results
示例#10
0
class ImageGenerator(object):
    def __init__(self,
                 rgb_generator,
                 input_processing_function=lambda x: x,
                 label_processing_function=lambda x: None,
                 use_multiprocessing=False,
                 wait_time=0.01,
                 workers=4,
                 max_queue_size=10):
        class BatchGenerator(object):
            def __iter__(self):
                return self

            def __next__(self):
                rgb_images = next(rgb_generator)
                inputs, labels = [], []
                if type(rgb_images) is tuple:
                    rgb_images, labels = rgb_images

                inputs = np.array([
                    input_processing_function(rgb_image)
                    for rgb_image in rgb_images
                ])
                if len(labels) == 0:
                    labels = [
                        label_processing_function(rgb_image)
                        for rgb_image in rgb_images
                    ]
                    labels = [item for item in labels if item is not None]

                if len(labels) == 0: return np.array(inputs)
                else: return np.array(inputs), np.array(labels)

            # Python2 compatibility
            next = __next__

        self.generator = GeneratorEnqueuer(
            generator=BatchGenerator(),
            use_multiprocessing=use_multiprocessing,
            wait_time=wait_time)
        self.generator.start(workers=workers, max_queue_size=max_queue_size)

    def __iter__(self):
        return self

    def __next__(self):
        return next(self.generator.get())

    # Python2 compatibility
    next = __next__
示例#11
0
def test_generator_enqueuer_threadsafe():
    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs(
        DummySequence([3, 10, 10, 3])), use_multiprocessing=False)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    with pytest.raises(RuntimeError) as e:
        [next(gen_output) for _ in range(10)]
    assert 'thread-safe' in str(e.value)
    enqueuer.stop()
示例#12
0
    def __init__(self, data_generator, m_batch_size, num_samples, output_dir, normalization_mean):
        super().__init__()
        self.epoch_index = 0
        self.data_generator = data_generator
        self.batch_size = m_batch_size
        self.num_samples = num_samples
        self.tensorboard_writer = TensorBoardWriter(output_dir)
        self.normalization_mean = normalization_mean
        is_sequence = isinstance(self.data_generator, Sequence)
        if is_sequence:
            self.enqueuer = OrderedEnqueuer(self.data_generator,
                                            use_multiprocessing=True,
                                            shuffle=False)
        else:
            self.enqueuer = GeneratorEnqueuer(self.data_generator,
                                              use_multiprocessing=False,  # todo: how to 'True' ?
                                              wait_time=0.01)
        # todo: integrate the Sequence generator properly
#        import multiprocessing
#        self.enqueuer.start(workers=multiprocessing.cpu_count(), max_queue_size=4)
        self.enqueuer.start(workers=1, max_queue_size=4)
示例#13
0
def test_finite_generator_enqueuer_processes():
    enqueuer = GeneratorEnqueuer(create_finite_generator_from_sequence_pcs(
        TestSequence([3, 200, 200, 3])), use_multiprocessing=True)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    acc = []
    for output in gen_output:
        acc.append(int(output[0, 0, 0, 0]))
    assert acc != list(range(100)), "Order was keep in GeneratorEnqueuer with processes"
    enqueuer.stop()
示例#14
0
def test_finite_generator_enqueuer_threads():
    enqueuer = GeneratorEnqueuer(create_finite_generator_from_sequence_threads(
        DummySequence([3, 200, 200, 3])), use_multiprocessing=False)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    acc = []
    for output in gen_output:
        acc.append(int(output[0, 0, 0, 0]))
    assert set(acc) == set(range(100)), "Output is not the same"
    enqueuer.stop()
示例#15
0
def test_generator_enqueuer_processes():
    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs(
        DummySequence([3, 200, 200, 3])), use_multiprocessing=True)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    acc = []
    for i in range(100):
        acc.append(int(next(gen_output)[0, 0, 0, 0]))
    assert acc != list(range(100)), ('Order was keep in GeneratorEnqueuer '
                                     'with processes')
    enqueuer.stop()
示例#16
0
def train_gan(gan, data_generator, cf):
    check = _checkpoint(cf.output_path)
    logs = _logs_manager(cf.output_path, gan)

    train_datagen = GeneratorEnqueuer(
        data_generator, use_multiprocessing=cf.use_multiprocessing)
    train_datagen.start(cf.workers, cf.max_queue_size)
    train_generator = train_datagen.get()

    for epoch in range(cf.epochs):
        print('Epoch %d/%d' % (epoch + 1, cf.epochs))
        progbar = generic_utils.Progbar(data_generator.nb_steps)

        # Training loop
        for step in range(data_generator.nb_steps):
            batch_real = next(train_generator)
            batch_gray = np.expand_dims(batch_real[:, :, :, 0], -1)
            batch_chroma = batch_real[:, :, :, 1:]
            batch_fake = gan.combined.predict(batch_gray)[-1]

            dis_real_X = np.concatenate((batch_gray, batch_chroma), axis=-1)
            dis_fake_X = np.concatenate((batch_gray, batch_fake), axis=-1)

            d_loss = []
            for d, real_Y, fake_Y in zip(gan.discriminator, gan.dis_real_Y,
                                         gan.dis_fake_Y):
                d_real = d.train_on_batch(dis_real_X, real_Y)
                d_fake = d.train_on_batch(dis_fake_X, fake_Y)
                d_loss.append(0.5 * np.add(d_real, d_fake))

            g_loss = gan.combined.train_on_batch(
                batch_gray, gan.gen_real_Y + [batch_chroma])

            if step % (data_generator.nb_steps //
                       cf.plots_per_epoch) == 0 and step != 0:
                logs.save_plots(epoch, step,
                                data_generator.decoder(dis_fake_X),
                                data_generator.decoder(batch_real))

            if step % (data_generator.nb_steps //
                       cf.weights_per_epoch) == 0 and step != 0 and epoch != 0:
                check.save_weights(gan, epoch, step)

            d_names = ['d_loss_%d' % i for i in range(cf.d_scales)]
            g_names = ['g_cgan_%d' % i for i in range(cf.d_scales)]

            logs.update(
                names=['d_loss'] + d_names + ['g_loss'] + g_names + ['g_l1'],
                values=[sum(d_loss)] + d_loss + [g_loss[0]] + g_loss[1:-1] +
                [g_loss[-1]],
                progbar=progbar,
                display_step=cf.display_step)

    train_datagen.stop()
示例#17
0
def test_generator_enqueuer_threadsafe():
    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs(
        DummySequence([3, 10, 10, 3])), use_multiprocessing=False)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    with pytest.raises(RuntimeError) as e:
        [next(gen_output) for _ in range(10)]
    assert 'thread-safe' in str(e.value)
    enqueuer.stop()
示例#18
0
    def __init__(self,
                 dataset,
                 image_size=None,
                 batch_size=32,
                 shuffle=False,
                 seed=None,
                 use_multiprocessing=True,
                 workers=4,
                 max_queue_size=10,
                 deprocess_X=None,
                 deprocess_Y=None):
        super(SegDataIterator, self).__init__(len(dataset), batch_size,
                                              shuffle, seed)

        self.dataset = dataset
        self.image_size = image_size[1], image_size[0]
        self.enqueuer = GeneratorEnqueuer(
            self, use_multiprocessing=use_multiprocessing)
        self.workers = workers
        self.max_queue_size = max_queue_size
        self.is_start = False

        self.deprocess_X = deprocess_X
        self.deprocess_Y = deprocess_Y
示例#19
0
def test_finite_generator_enqueuer_threads():
    enqueuer = GeneratorEnqueuer(create_finite_generator_from_sequence_threads(
        TestSequence([3, 200, 200, 3])), use_multiprocessing=False)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    acc = []
    for output in gen_output:
        acc.append(int(output[0, 0, 0, 0]))
    assert len(set(acc) - set(range(100))) == 0, "Output is not the same"
    enqueuer.stop()
示例#20
0
def test_generator_enqueuer_processes():
    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs(
        DummySequence([3, 10, 10, 3])), use_multiprocessing=True)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    acc = []
    for i in range(100):
        acc.append(int(next(gen_output)[0, 0, 0, 0]))
    assert acc != list(range(100)), ('Order was keep in GeneratorEnqueuer '
                                     'with processes')
    enqueuer.stop()
示例#21
0
def test_generator_enqueuer_threads():
    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_threads(
        DummySequence([3, 200, 200, 3])),
                                 use_multiprocessing=False)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    acc = []
    for i in range(100):
        acc.append(int(next(gen_output)[0, 0, 0, 0]))
    """
     Not comparing the order since it is not guaranteed.
     It may get ordered, but not a lot, one thread can take the GIL before he was supposed to.
    """
    assert len(set(acc) - set(range(100))) == 0, "Output is not the same"
    enqueuer.stop()
示例#22
0
def test_generator_enqueuer_threads():
    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_threads(
        TestSequence([3, 200, 200, 3])), use_multiprocessing=False)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    acc = []
    for i in range(100):
        acc.append(int(next(gen_output)[0, 0, 0, 0]))

    """
     Not comparing the order since it is not guarantee.
     It may get ordered, but not a lot, one thread can take the GIL before he was supposed to.
    """
    assert len(set(acc) - set(range(100))) == 0, "Output is not the same"
    enqueuer.stop()
示例#23
0
    def auc_eval(self):
        if isinstance(self.validation_data, Generator):
            assert self.validation_steps is not None, \
                'If validation data is a generator, validation steps must be provided'
            y_pred = []
            y_true = []

            enqueuer = GeneratorEnqueuer(self.validation_data,
                                         use_multiprocessing=False,
                                         wait_time=.01)
            enqueuer.start(workers=1, max_queue_size=10)
            output_generator = enqueuer.get()

            for _ in range(self.validation_steps):
                generator_output = next(output_generator)
                if not hasattr(generator_output, '__len__'):
                    raise ValueError('Output of generator should be a tuple '
                                     '(x, y, sample_weight) '
                                     'or (x, y). Found: ' +
                                     str(generator_output))
                if len(generator_output) == 2:
                    x, y = generator_output
                elif len(generator_output) == 3:
                    x, y, _ = generator_output
                else:
                    raise ValueError('Output of generator should be a tuple '
                                     '(x, y, sample_weight) '
                                     'or (x, y). Found: ' +
                                     str(generator_output))
                outs = self.model.predict_on_batch(x)

                y_pred += outs.tolist()
                y_true += y.tolist()

            enqueuer.stop()
        else:
            y_pred = self.model.predict(self.validation_data[0])
            y_true = self.validation_data[1].astype(np.bool)

        roc_auc = roc_auc_score(y_true=y_true, y_score=y_pred)
        self.auc.append(roc_auc)
        print('AUC Score is %s' % self.auc[-1])
示例#24
0
class SegDataIterator(Iterator):
    """
    支持voc和coco图像分割
    """
    def __init__(self,
                 dataset,
                 image_size=None,
                 batch_size=32,
                 shuffle=False,
                 seed=None,
                 use_multiprocessing=True,
                 workers=4,
                 max_queue_size=10,
                 deprocess_X=None,
                 deprocess_Y=None):
        super(SegDataIterator, self).__init__(len(dataset), batch_size,
                                              shuffle, seed)

        self.dataset = dataset
        self.image_size = image_size[1], image_size[0]
        self.enqueuer = GeneratorEnqueuer(
            self, use_multiprocessing=use_multiprocessing)
        self.workers = workers
        self.max_queue_size = max_queue_size
        self.is_start = False

        self.deprocess_X = deprocess_X
        self.deprocess_Y = deprocess_Y

    def _get_batches_of_transformed_samples(self, index_array):
        batch_x = np.zeros(
            (len(index_array), self.image_size[1], self.image_size[0], 3),
            np.float32)
        batch_y = np.zeros((len(index_array), self.image_size[1],
                            self.image_size[0], self.dataset.num_classes),
                           np.float32)

        for i, j in enumerate(index_array):
            x = self.dataset.load_image(j, self.image_size)
            y = self.dataset.load_segmentation(j, self.image_size)
            batch_x[i] = x
            batch_y[i] = y
        if self.deprocess_X:
            batch_x = self.deprocess_X(batch_x)

        if self.deprocess_Y:
            batch_y = self.deprocess_Y(batch_y)

        return batch_x, batch_y

    def next(self):
        """For python 2.x.

        # Returns
            The next batch.
        """
        # Keeps under lock only the mechanism which advances
        # the indexing of each batch.
        with self.lock:
            index_array = next(self.index_generator)
        # The transformation of images is not under thread lock
        # so it can be done in parallel
        return self._get_batches_of_transformed_samples(index_array)

    def data_gen(self):
        if not self.is_start:
            self.enqueuer.start(workers=self.workers,
                                max_queue_size=self.max_queue_size)
            self.output_generator = self.enqueuer.get()
            self.is_start = True
        batch_x, batch_y = next(self.output_generator)
        return batch_x, batch_y
示例#25
0
    def fit_with_pseudo_label(self,
                              steps_per_epoch,
                              validation_steps=None,
                              use_checkpoints=True,
                              class_labels=None,
                              verbose=1,
                              use_multiprocessing=False,
                              shuffle=False,
                              workers=1,
                              max_queue_size=10):

        # Default value if validation steps is none
        if (validation_steps == None):
            validation_steps = self.validation_generator.samples // self.batch_size

        wait_time = 0.01  # in seconds

        self.model._make_train_function()

        # Create a checkpoint callback
        checkpoint = ModelCheckpoint("../models_checkpoints/" +
                                     str(self.h5_filename) + ".h5",
                                     monitor='val_acc',
                                     verbose=1,
                                     save_best_only=True,
                                     save_weights_only=True,
                                     mode='auto',
                                     period=1)

        # Generate callbacks
        callback_list = []
        if use_checkpoints:
            callback_list.append(checkpoint)

        # Init train counters
        epoch = 0

        validation_data = self.validation_generator
        do_validation = bool(validation_data)
        self.model._make_train_function()
        if do_validation:
            self.model._make_test_function()

        val_gen = (hasattr(validation_data, 'next')
                   or hasattr(validation_data, '__next__')
                   or isinstance(validation_data, Sequence))
        if (val_gen and not isinstance(validation_data, Sequence)
                and not validation_steps):
            raise ValueError('`validation_steps=None` is only valid for a'
                             ' generator based on the `keras.utils.Sequence`'
                             ' class. Please specify `validation_steps` or use'
                             ' the `keras.utils.Sequence` class.')

        # Prepare display labels.
        out_labels = self.model.metrics_names
        callback_metrics = out_labels + ['val_' + n for n in out_labels]

        # Prepare train callbacks
        self.model.history = cbks.History()
        callbacks = [cbks.BaseLogger()] + (callback_list or []) + \
            [self.model.history]
        if verbose:
            callbacks += [cbks.ProgbarLogger(count_mode='steps')]
        callbacks = cbks.CallbackList(callbacks)

        # it's possible to callback a different model than self:
        if hasattr(self.model, 'callback_model') and self.model.callback_model:
            callback_model = self.model.callback_model

        else:
            callback_model = self.model

        callbacks.set_model(callback_model)

        is_sequence = isinstance(self.train_generator, Sequence)
        if not is_sequence and use_multiprocessing and workers > 1:
            warnings.warn(
                UserWarning('Using a generator with `use_multiprocessing=True`'
                            ' and multiple workers may duplicate your data.'
                            ' Please consider using the`keras.utils.Sequence'
                            ' class.'))

        if is_sequence:
            steps_per_epoch = len(self.train_generator)

        enqueuer = None
        val_enqueuer = None

        callbacks.set_params({
            'epochs': self.epochs,
            'steps': steps_per_epoch,
            'verbose': verbose,
            'do_validation': do_validation,
            'metrics': callback_metrics,
        })
        callbacks.on_train_begin()

        try:
            if do_validation and not val_gen:
                # Prepare data for validation
                if len(validation_data) == 2:
                    val_x, val_y = validation_data
                    val_sample_weight = None
                elif len(validation_data) == 3:
                    val_x, val_y, val_sample_weight = validation_data
                else:
                    raise ValueError('`validation_data` should be a tuple '
                                     '`(val_x, val_y, val_sample_weight)` '
                                     'or `(val_x, val_y)`. Found: ' +
                                     str(validation_data))
                val_x, val_y, val_sample_weights = self.model._standardize_user_data(
                    val_x, val_y, val_sample_weight)
                val_data = val_x + val_y + val_sample_weights
                if self.model.uses_learning_phase and not isinstance(
                        K.learning_phase(), int):
                    val_data += [0.]
                for cbk in callbacks:
                    cbk.validation_data = val_data

            if is_sequence:
                enqueuer = OrderedEnqueuer(
                    self.train_generator,
                    use_multiprocessing=use_multiprocessing,
                    shuffle=shuffle)
            else:
                enqueuer = GeneratorEnqueuer(
                    self.train_generator,
                    use_multiprocessing=use_multiprocessing,
                    wait_time=wait_time)
            enqueuer.start(workers=workers, max_queue_size=max_queue_size)
            output_generator = enqueuer.get()

            # Train the model

            # Construct epoch logs.
            epoch_logs = {}
            # Epochs
            while epoch < self.epochs:
                callbacks.on_epoch_begin(epoch)
                steps_done = 0
                batch_index = 0

                # Steps per epoch
                while steps_done < steps_per_epoch:

                    generator_output = next(output_generator)

                    if len(generator_output) == 2:
                        x, y = generator_output
                        sample_weight = None
                    elif len(generator_output) == 3:
                        x, y, sample_weight = generator_output
                    else:
                        raise ValueError('Output of generator should be '
                                         'a tuple `(x, y, sample_weight)` '
                                         'or `(x, y)`. Found: ' +
                                         str(generator_output))

                    #==========================
                    # Mini-batch
                    #==========================
                    if (self.print_pseudo_generate):
                        print ''
                        print 'Generating pseudo-labels...'
                        verbose = 1
                    else:
                        verbose = 0

                    if self.no_label_generator.samples > 0:
                        no_label_output = self.model.predict_generator(
                            self.no_label_generator,
                            self.no_label_generator.samples,
                            verbose=verbose)

                        # One-hot encoded
                        self.no_label_generator.classes = np.argmax(
                            no_label_output, axis=1)

                        # Concat Pseudo labels with true labels
                        x_pseudo, y_pseudo = next(self.no_label_generator)
                        x, y = np.concatenate((x, x_pseudo),
                                              axis=0), np.concatenate(
                                                  (y, y_pseudo), axis=0)

                    # build batch logs
                    batch_logs = {}
                    if isinstance(x, list):
                        batch_size = x[0].shape[0]
                    elif isinstance(x, dict):
                        batch_size = list(x.values())[0].shape[0]
                    else:
                        batch_size = x.shape[0]
                    batch_logs['batch'] = batch_index
                    batch_logs['size'] = batch_size
                    callbacks.on_batch_begin(batch_index, batch_logs)

                    # Runs a single gradient update on a single batch of data
                    scalar_training_loss = self.model.train_on_batch(x=x, y=y)

                    if not isinstance(scalar_training_loss, list):
                        scalar_training_loss = [scalar_training_loss]
                    for l, o in zip(out_labels, scalar_training_loss):
                        batch_logs[l] = o

                    callbacks.on_batch_end(batch_index, batch_logs)

                    #==========================
                    # end Mini-batch
                    #==========================

                    batch_index += 1
                    steps_done += 1

                if steps_done >= steps_per_epoch and do_validation:
                    if val_gen:
                        val_outs = self.model.evaluate_generator(
                            validation_data,
                            validation_steps,
                            workers=workers,
                            use_multiprocessing=use_multiprocessing,
                            max_queue_size=max_queue_size)
                    else:
                        # No need for try/except because
                        # data has already been validated.
                        val_outs = self.model.evaluate(
                            val_x,
                            val_y,
                            batch_size=batch_size,
                            sample_weight=val_sample_weights,
                            verbose=0)
                    if not isinstance(val_outs, list):
                        val_outs = [val_outs]
                    # Same labels assumed.
                    for l, o in zip(out_labels, val_outs):
                        epoch_logs['val_' + l] = o

                # Epoch finished.
                callbacks.on_epoch_end(epoch, epoch_logs)
                epoch += 1

        finally:
            try:
                if enqueuer is not None:
                    enqueuer.stop()
            finally:
                if val_enqueuer is not None:
                    val_enqueuer.stop()

        callbacks.on_train_end()
        return self.model.history
示例#26
0
class ModelDiagnoser(Callback):
    def __init__(self, data_generator, m_batch_size, num_samples, output_dir, normalization_mean):
        super().__init__()
        self.epoch_index = 0
        self.data_generator = data_generator
        self.batch_size = m_batch_size
        self.num_samples = num_samples
        self.tensorboard_writer = TensorBoardWriter(output_dir)
        self.normalization_mean = normalization_mean
        is_sequence = isinstance(self.data_generator, Sequence)
        if is_sequence:
            self.enqueuer = OrderedEnqueuer(self.data_generator,
                                            use_multiprocessing=True,
                                            shuffle=False)
        else:
            self.enqueuer = GeneratorEnqueuer(self.data_generator,
                                              use_multiprocessing=False,  # todo: how to 'True' ?
                                              wait_time=0.01)
        # todo: integrate the Sequence generator properly
#        import multiprocessing
#        self.enqueuer.start(workers=multiprocessing.cpu_count(), max_queue_size=4)
        self.enqueuer.start(workers=1, max_queue_size=4)

    def on_epoch_end(self, epoch, logs=None):
        output_generator = self.enqueuer.get()
        steps_done = 0
        total_steps = int(np.ceil(np.divide(self.num_samples, self.batch_size)))
        sample_index = 0
        while steps_done < total_steps:
            generator_output = next(output_generator)
            x, y = generator_output[:2]
            x = next(iter(x.values()))
            y = next(iter(y.values()))
            y_pred = self.model.predict(x)
            self.epoch_index += 1

            for i in range(0, len(y_pred)):
                n = steps_done * self.batch_size + i
                if n >= self.num_samples:
                    return

                # rearranging images for visualization
                img_x = self.__reformat_img(x, i)
                img_y = self.__reformat_img(y, i)
                img_p = self.__reformat_img(y_pred, i)

                self.tensorboard_writer.save_image("Epoch-{}/{}/x"
                                                   .format(self.epoch_index, sample_index), img_x)
                self.tensorboard_writer.save_image("Epoch-{}/{}/y"
                                                   .format(self.epoch_index, sample_index), img_y)
                self.tensorboard_writer.save_image("Epoch-{}/{}/y_pred"
                                                   .format(self.epoch_index, sample_index), img_p)
                sample_index += 1

            steps_done += 1

    def __reformat_img(self, img_np_array, img_index):
        img = np.squeeze(img_np_array[img_index, :, :, :])
        img = 255. * (img + self.normalization_mean)  # mean is the training images normalization mean
        img = img[:, :, [2, 1, 0]]  # reordering of channels
        return img

    def on_train_end(self, logs=None):
        self.enqueuer.stop()
        self.tensorboard_writer.close()
示例#27
0
def test_gan(gan, data_generator, cf):
    results_path = os.path.join(cf.output_path, 'results')
    if not os.path.exists(results_path): os.makedirs(results_path)

    l1_cum = 0
    psnr_cum = 0
    perc_cum = 0
    a_cum_dist = np.zeros(219)
    b_cum_dist = np.zeros(219)

    perception = perceptual_model()
    gan.load_weights(cf.weights_path)

    test_datagen = GeneratorEnqueuer(
        data_generator, use_multiprocessing=cf.use_multiprocessing)
    test_datagen.start(cf.workers, cf.max_queue_size)
    test_generator = test_datagen.get()

    for _ in tqdm.tqdm(range(data_generator.samples)):
        batch_real, _ = next(test_generator)
        batch_gray = np.expand_dims(batch_real[..., 0], -1)
        batch_pred = gan.predict(batch_gray)
        batch_fake = np.concatenate((batch_gray, batch_pred), axis=-1)

        # compute color distribution
        real_rgb = data_generator.decoder(batch_real)
        fake_rgb = data_generator.decoder(batch_fake)
        real_lab, fake_lab = [], []
        for fake, real in zip(fake_rgb, real_rgb):
            fake = rgb2lab(fake)
            fake_lab.append(fake)
            real_lab.append(rgb2lab(real))
            a_cum_dist += np.histogram(fake[..., 1],
                                       bins=np.arange(-110., 110.))[0]
            b_cum_dist += np.histogram(fake[..., 2],
                                       bins=np.arange(-110., 110.))[0]

        fake_lab = np.array(fake_lab)
        real_lab = np.array(real_lab)

        # compute l1
        l1_cum += np.abs(np.subtract(fake_lab[..., 1:], real_lab[...,
                                                                 1:])).mean()

        # compute psnr
        mse = np.square(np.subtract(fake_lab[..., 1:], real_lab[...,
                                                                1:])).mean()
        psnr_cum += np.sum(20 * np.log10(255. / np.sqrt(mse)))

        # compute perceptual loss
        real_rgb = resize(real_rgb)
        fake_rgb = resize(fake_rgb)
        perc_cum += sum(
            perception.predict(
                [preprocess_input(i) for i in [real_rgb, fake_rgb]]))

    l1 = l1_cum / data_generator.samples
    psnr = psnr_cum / data_generator.samples
    perc_loss = perc_cum / data_generator.samples

    with open(results_path + '/logs.txt', 'a') as f:
        f.write('experiment, l1, psnr, perc loss\n')
        f.write(cf.experiment_name + ', %f, %f, %f\n' % (l1, psnr, perc_loss))

    a_dist = np.log(a_cum_dist / data_generator.samples)
    b_dist = np.log(b_cum_dist / data_generator.samples)

    a_dist[a_dist == -np.inf] = np.inf
    a_dist[a_dist == np.inf] = a_dist.min()
    b_dist[b_dist == -np.inf] = np.inf
    b_dist[b_dist == np.inf] = b_dist.min()

    np.save(os.path.join(results_path, 'a_dist.npy'), a_dist)
    np.save(os.path.join(results_path, 'b_dist.npy'), b_dist)
示例#28
0
def validate(config, model, val_client, validation_steps, metrics_id, epoch):

    val_di = val_client.gen()
    from keras.utils import GeneratorEnqueuer

    val_thre = GeneratorEnqueuer(val_di)
    val_thre.start()

    model_metrics = []
    inhouse_metrics = []

    for i in range(validation_steps):

        X, GT = next(val_thre.get())

        Y = model.predict(X)

        model_losses = [(np.sum((gt - y)**2) / gt.shape[0] / 2)
                        for gt, y in zip(GT, Y)]
        mm = sum(model_losses)

        if config.paf_layers > 0 and config.heat_layers > 0:
            GTL6 = np.concatenate([GT[-2], GT[-1]], axis=3)
            YL6 = np.concatenate([Y[-2], Y[-1]], axis=3)
            mm6l1 = model_losses[-2]
            mm6l2 = model_losses[-1]
        elif config.paf_layers == 0 and config.heat_layers > 0:
            GTL6 = GT[-1]
            YL6 = Y[-1]
            mm6l1 = None
            mm6l2 = model_losses[-1]
        else:
            assert False, "Wtf or not implemented"

        m = calc_batch_metrics(i, GTL6, YL6,
                               range(config.heat_start, config.bkg_start))
        inhouse_metrics += [m]

        model_metrics += [(i, mm, mm6l1, mm6l2, m["MAE"].sum() / GTL6.shape[0],
                           m["RMSE"].sum() / GTL6.shape[0], m["DIST"].mean())]
        print(
            "Validating[BATCH: %d] LOSS: %0.4f, S6L1: %0.4f, S6L2: %0.4f, MAE: %0.4f, RMSE: %0.4f, DIST: %0.2f"
            % model_metrics[-1])

    inhouse_metrics = pd.concat(inhouse_metrics)
    inhouse_metrics['epoch'] = epoch
    inhouse_metrics.to_csv("logs/val_scores.%s.%04d.txt" % (metrics_id, epoch),
                           sep="\t")

    model_metrics = pd.DataFrame(model_metrics,
                                 columns=("batch", "loss", "stage6l1",
                                          "stage6l2", "mae", "rmse", "dist"))
    model_metrics['epoch'] = epoch
    del model_metrics['batch']
    model_metrics = model_metrics.groupby('epoch').mean()
    with open('%s.val.tsv' % metrics_id, 'a') as f:
        model_metrics.to_csv(f,
                             header=(epoch == 1),
                             sep="\t",
                             float_format='%.4f')

    val_thre.stop()
def validate(config, model, multi_model, val_client, validation_steps,
             metrics_id, epoch):
    # 得到的X是包含image, confidence mask, paf mask的list,得到的Y是包含6个stage一共12个groundtruth的heapmap
    # 网络一共有三个输入(对于训练时的评估,指标为了反映出训练的效果在测试时网络模型就不用考虑对feature map的输出进行mask了,
    # 可以对所有区域预测),即原始image,以及在训练模型过程中评估时,去除没有标记区域的confidence和paf的mask1, mask2

    val_di = val_client.gen()

    val_thre = GeneratorEnqueuer(
        val_di
    )  # The provided generator can be finite in which case the class will throw
    # a `StopIteration` exception. 但是这里实现的gen貌似不存在这种问题。不过这个函数提供了multiprocess的封装
    val_thre.start()

    model_metrics = []
    inhouse_metrics = []
    t0 = time()
    for i in range(validation_steps
                   ):  # 分成很多个batch进行预测估计的,为了减少validation耗时,在计算validation部分数据
        # validation_steps = val_samples//batch_size 为了防止内存OOM,所以要分batch预测
        # if random.randint(0, 9) < 5:  # 只计算20%的数据
        #     continue
        X, GT = next(val_thre.get())
        Y = multi_model.predict(X)

        model_losses = [(np.sum((gt - y)**2) / gt.shape[0] / 2)
                        for gt, y in zip(GT, Y)]
        # 与模型定义时的loss保持一致,除以2的好处是,平方项的微分会出现2,抵消,可以减少乘法操作
        mm = sum(model_losses)

        if config.paf_layers > 0 and config.heat_layers > 0:
            GTL6 = np.concatenate([GT[-2], GT[-1]], axis=3)
            YL6 = np.concatenate([Y[-2], Y[-1]], axis=3)
            mm6l1 = model_losses[-2]  # NOTICE! 计算的是模型最后一个阶段的预测和groundtruth的距离
            mm6l2 = model_losses[-1]
        elif config.paf_layers == 0 and config.heat_layers > 0:
            GTL6 = GT[-1]
            YL6 = Y[-1]
            mm6l1 = None
            mm6l2 = model_losses[-1]
        else:
            assert False, "Wtf or not implemented"

        m = calc_batch_metrics(i, GTL6, YL6,
                               range(config.heat_start, config.bkg_start))
        inhouse_metrics += [m]

        model_metrics += [(i, mm, mm6l1, mm6l2, m["MAE"].sum() / GTL6.shape[0],
                           m["RMSE"].sum() / GTL6.shape[0], m["DIST"].mean())]
        # 以epoch为key,group之后取平均值
        print(
            "Validating[BATCH: %d] LOSS: %0.4f, S6L1: %0.4f, S6L2: %0.4f, MAE: %0.4f, RMSE: %0.4f, DIST: %0.2f"
            % model_metrics[-1])

    t1 = time()
    print('The CNN prediction time during validation is : ', t1 - t0)
    # inhouse_metrics = pd.concat(inhouse_metrics)
    # inhouse_metrics['epoch'] = epoch
    # inhouse_metrics.to_csv("logs/val_scores.%s.%04d.csv" % (metrics_id, epoch))  # , sep="\t" 默认的不是\t,而是','
    # # 保存的是每个层的细节
    #
    # model_metrics = pd.DataFrame(model_metrics, columns=("batch","loss","stage6l1","stage6l2","mae","rmse","dist") )
    # model_metrics['epoch'] = epoch
    # del model_metrics['batch']
    # model_metrics = model_metrics.groupby('epoch').mean()
    # with open('%s.val.tsv' % metrics_id, 'a') as f:
    #     model_metrics.to_csv(f, header=(epoch==1), float_format='%.4f')  # sep="\t",
    #
    # print(inhouse_metrics[["layer", "epoch", "MAE", "RMSE", "DIST"]].groupby(["layer", "epoch"]).mean())

    val_thre.stop()
示例#30
0
batch_counter = 0
n_batch_per_epoch = min(N_data / batch_size * 10,
                        3000)  #check point: every 10 epoch
step_lr_drop = 5

disc_losses = []
recont_losses = []
gen_losses = []
pre_loss = 9999
lr_current = lr_schedule[epoch]

real_ratio = 1.0
feed_iter = datagenerator.generator()
K.set_value(discriminator.optimizer.lr, lr_current)
K.set_value(dcgan.optimizer.lr, lr_current)
fed = GeneratorEnqueuer(feed_iter, use_multiprocessing=True, wait_time=5)
fed.start(workers=6, max_queue_size=200)
iter_ = fed.get()

zero_target = np.zeros((batch_size))
for X_src, X_tgt, disc_tgt, prob_gt in iter_:
    discriminator.trainable = True
    X_disc, y_disc = get_disc_batch(X_src,
                                    X_tgt,
                                    generator_train,
                                    0,
                                    label_smoothing=True,
                                    label_flipping=0.2)
    disc_loss = discriminator.train_on_batch(X_disc, y_disc)

    X_disc, y_disc = get_disc_batch(X_src,
示例#31
0
    def fit_with_pseudo_label(self,
                              steps_per_epoch,
                              use_checkpoints=False,
                              class_labels=None,
                              verbose=1,
                              use_multiprocessing=False,
                              shuffle=False,
                              workers=1,
                              max_queue_size=10):

        wait_time = 0.01  # in seconds

        self.model._make_train_function()

        # Create a checkpoint callback
        checkpoint = ModelCheckpoint("../models_checkpoints/" +
                                     str(self.h5_filename) + ".h5",
                                     monitor='val_acc',
                                     verbose=1,
                                     save_best_only=True,
                                     save_weights_only=True,
                                     mode='auto',
                                     period=1)

        # Generate callbacks
        callback_list = []
        if use_checkpoints:
            callback_list.extend(checkpoint)

        # Init train counters
        epoch = 0

        # Prepare display labels.
        out_labels = self.model._get_deduped_metrics_names()
        callback_metrics = out_labels + ['val_' + n for n in out_labels]

        # Prepare train callbacks
        self.model.history = cbks.History()
        callbacks = [cbks.BaseLogger()] + (callback_list or []) + \
            [self.model.history]
        if verbose:
            callbacks += [cbks.ProgbarLogger(count_mode='steps')]
        callbacks = cbks.CallbackList(callbacks)

        # it's possible to callback a different model than self:
        if hasattr(self.model, 'callback_model') and self.model.callback_model:
            callback_model = self.model.callback_model

        else:
            callback_model = self.model

        callbacks.set_model(callback_model)

        is_sequence = isinstance(self.train_generator, Sequence)
        if not is_sequence and use_multiprocessing and workers > 1:
            warnings.warn(
                UserWarning('Using a generator with `use_multiprocessing=True`'
                            ' and multiple workers may duplicate your data.'
                            ' Please consider using the`keras.utils.Sequence'
                            ' class.'))

        if is_sequence:
            steps_per_epoch = len(self.train_generator)
        enqueuer = None

        callbacks.set_params({
            'epochs': self.epochs,
            'steps': steps_per_epoch,
            'verbose': verbose,
            'do_validation': True,
            'metrics': callback_metrics,
        })
        callbacks.on_train_begin()

        try:
            if is_sequence:
                enqueuer = OrderedEnqueuer(
                    self.train_generator,
                    use_multiprocessing=use_multiprocessing,
                    shuffle=shuffle)
            else:
                enqueuer = GeneratorEnqueuer(
                    self.train_generator,
                    use_multiprocessing=use_multiprocessing,
                    wait_time=wait_time)
            enqueuer.start(workers=workers, max_queue_size=max_queue_size)
            output_generator = enqueuer.get()

            # Train the model
            # Epochs
            while epoch < self.epochs:
                callbacks.on_epoch_begin(epoch)
                steps_done = 0
                batch_index = 0

                # Steps per epoch
                while steps_done < steps_per_epoch:

                    generator_output = next(output_generator)

                    if len(generator_output) == 2:
                        x, y = generator_output
                        sample_weight = None
                    elif len(generator_output) == 3:
                        x, y, sample_weight = generator_output
                    else:
                        raise ValueError('Output of generator should be '
                                         'a tuple `(x, y, sample_weight)` '
                                         'or `(x, y)`. Found: ' +
                                         str(generator_output))

                    #==========================
                    # Mini-batch
                    #==========================
                    print ''
                    print 'Generating pseudo-labels...'
                    no_label_output = self.model.predict_generator(
                        self.no_label_generator,
                        None,  # because the model is instance of sequence
                        verbose=1)

                    # One-hot encoded
                    self.no_label_generator.classes = np.argmax(
                        no_label_output, axis=1)

                    # Concat Pseudo labels with true labels
                    x_pseudo, y_pseudo = next(self.no_label_generator)
                    x, y = np.concatenate(
                        (x, x_pseudo), axis=0), np.concatenate((y, y_pseudo),
                                                               axis=0)

                    if len(generator_output) == 2:
                        x, y = generator_output
                        sample_weight = None
                    elif len(generator_output) == 3:
                        x, y, sample_weight = generator_output
                    else:
                        raise ValueError('Output of generator should be '
                                         'a tuple `(x, y, sample_weight)` '
                                         'or `(x, y)`. Found: ' +
                                         str(generator_output))

                    # build batch logs
                    batch_logs = {}
                    if isinstance(x, list):
                        batch_size = x[0].shape[0]
                    elif isinstance(x, dict):
                        batch_size = list(x.values())[0].shape[0]
                    else:
                        batch_size = x.shape[0]
                    batch_logs['batch'] = batch_index
                    batch_logs['size'] = batch_size
                    callbacks.on_batch_begin(batch_index, batch_logs)

                    # Runs a single gradient update on a single batch of data
                    scalar_training_loss = self.model.train_on_batch(x=x, y=y)

                    if not isinstance(scalar_training_loss, list):
                        scalar_training_loss = [scalar_training_loss]
                    for l, o in zip(out_labels, scalar_training_loss):
                        batch_logs[l] = o

                    callbacks.on_batch_end(batch_index, batch_logs)

                    #==========================
                    # end Mini-batch
                    #==========================

                    batch_index += 1
                    steps_done += 1

                # Epoch finished.
                epoch += 1

        finally:
            if enqueuer is not None:
                enqueuer.stop()

        callbacks.on_train_end()
        return self.model.history