Пример #1
0
    def _predict(self,
                 data_generator_function,
                 steps_per_epoch,
                 include_datum=False):
        data_generator = data_generator_function(include_datum=True)
        enqueuer = GeneratorEnqueuer(data_generator)
        enqueuer.start(workers=self._WORKERS, max_queue_size=self._MAX_Q_SIZE)

        caption_results = []
        datum_results = []
        for _ in range(steps_per_epoch):
            generator_output = None
            while enqueuer.is_running():
                if not enqueuer.queue.empty():
                    generator_output = enqueuer.queue.get()
                    break
                else:
                    sleep(self._WAIT_TIME)

            x, y, datum_batch = generator_output[1]

            captions_pred_str = self._predict_batch(x, y)
            caption_results += captions_pred_str
            datum_results += datum_batch

        enqueuer.stop()

        if include_datum:
            return zip(caption_results, datum_results)
        else:
            return caption_results
Пример #2
0
def test_generator_enqueuer_threadsafe():
    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs(
        DummySequence([3, 10, 10, 3])), use_multiprocessing=False)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    with pytest.raises(RuntimeError) as e:
        [next(gen_output) for _ in range(10)]
    assert 'thread-safe' in str(e.value)
    enqueuer.stop()
Пример #3
0
def test_generator_enqueuer_threadsafe():
    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs(
        DummySequence([3, 10, 10, 3])), use_multiprocessing=False)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    with pytest.raises(RuntimeError) as e:
        [next(gen_output) for _ in range(10)]
    assert 'thread-safe' in str(e.value)
    enqueuer.stop()
Пример #4
0
def test_finite_generator_enqueuer_threads():
    enqueuer = GeneratorEnqueuer(create_finite_generator_from_sequence_threads(
        DummySequence([3, 200, 200, 3])), use_multiprocessing=False)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    acc = []
    for output in gen_output:
        acc.append(int(output[0, 0, 0, 0]))
    assert set(acc) == set(range(100)), "Output is not the same"
    enqueuer.stop()
Пример #5
0
def test_finite_generator_enqueuer_processes():
    enqueuer = GeneratorEnqueuer(create_finite_generator_from_sequence_pcs(
        TestSequence([3, 200, 200, 3])), use_multiprocessing=True)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    acc = []
    for output in gen_output:
        acc.append(int(output[0, 0, 0, 0]))
    assert acc != list(range(100)), "Order was keep in GeneratorEnqueuer with processes"
    enqueuer.stop()
Пример #6
0
def test_finite_generator_enqueuer_threads():
    enqueuer = GeneratorEnqueuer(create_finite_generator_from_sequence_threads(
        TestSequence([3, 200, 200, 3])), use_multiprocessing=False)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    acc = []
    for output in gen_output:
        acc.append(int(output[0, 0, 0, 0]))
    assert len(set(acc) - set(range(100))) == 0, "Output is not the same"
    enqueuer.stop()
Пример #7
0
def train_gan(gan, data_generator, cf):
    check = _checkpoint(cf.output_path)
    logs = _logs_manager(cf.output_path, gan)

    train_datagen = GeneratorEnqueuer(
        data_generator, use_multiprocessing=cf.use_multiprocessing)
    train_datagen.start(cf.workers, cf.max_queue_size)
    train_generator = train_datagen.get()

    for epoch in range(cf.epochs):
        print('Epoch %d/%d' % (epoch + 1, cf.epochs))
        progbar = generic_utils.Progbar(data_generator.nb_steps)

        # Training loop
        for step in range(data_generator.nb_steps):
            batch_real = next(train_generator)
            batch_gray = np.expand_dims(batch_real[:, :, :, 0], -1)
            batch_chroma = batch_real[:, :, :, 1:]
            batch_fake = gan.combined.predict(batch_gray)[-1]

            dis_real_X = np.concatenate((batch_gray, batch_chroma), axis=-1)
            dis_fake_X = np.concatenate((batch_gray, batch_fake), axis=-1)

            d_loss = []
            for d, real_Y, fake_Y in zip(gan.discriminator, gan.dis_real_Y,
                                         gan.dis_fake_Y):
                d_real = d.train_on_batch(dis_real_X, real_Y)
                d_fake = d.train_on_batch(dis_fake_X, fake_Y)
                d_loss.append(0.5 * np.add(d_real, d_fake))

            g_loss = gan.combined.train_on_batch(
                batch_gray, gan.gen_real_Y + [batch_chroma])

            if step % (data_generator.nb_steps //
                       cf.plots_per_epoch) == 0 and step != 0:
                logs.save_plots(epoch, step,
                                data_generator.decoder(dis_fake_X),
                                data_generator.decoder(batch_real))

            if step % (data_generator.nb_steps //
                       cf.weights_per_epoch) == 0 and step != 0 and epoch != 0:
                check.save_weights(gan, epoch, step)

            d_names = ['d_loss_%d' % i for i in range(cf.d_scales)]
            g_names = ['g_cgan_%d' % i for i in range(cf.d_scales)]

            logs.update(
                names=['d_loss'] + d_names + ['g_loss'] + g_names + ['g_l1'],
                values=[sum(d_loss)] + d_loss + [g_loss[0]] + g_loss[1:-1] +
                [g_loss[-1]],
                progbar=progbar,
                display_step=cf.display_step)

    train_datagen.stop()
Пример #8
0
def test_generator_enqueuer_processes():
    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs(
        DummySequence([3, 200, 200, 3])), use_multiprocessing=True)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    acc = []
    for i in range(100):
        acc.append(int(next(gen_output)[0, 0, 0, 0]))
    assert acc != list(range(100)), ('Order was keep in GeneratorEnqueuer '
                                     'with processes')
    enqueuer.stop()
Пример #9
0
def test_generator_enqueuer_processes():
    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs(
        DummySequence([3, 10, 10, 3])), use_multiprocessing=True)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    acc = []
    for i in range(100):
        acc.append(int(next(gen_output)[0, 0, 0, 0]))
    assert acc != list(range(100)), ('Order was keep in GeneratorEnqueuer '
                                     'with processes')
    enqueuer.stop()
Пример #10
0
def test_generator_enqueuer_threads():
    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_threads(
        DummySequence([3, 200, 200, 3])),
                                 use_multiprocessing=False)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    acc = []
    for i in range(100):
        acc.append(int(next(gen_output)[0, 0, 0, 0]))
    """
     Not comparing the order since it is not guaranteed.
     It may get ordered, but not a lot, one thread can take the GIL before he was supposed to.
    """
    assert len(set(acc) - set(range(100))) == 0, "Output is not the same"
    enqueuer.stop()
Пример #11
0
def test_generator_enqueuer_threads():
    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_threads(
        TestSequence([3, 200, 200, 3])), use_multiprocessing=False)
    enqueuer.start(3, 10)
    gen_output = enqueuer.get()
    acc = []
    for i in range(100):
        acc.append(int(next(gen_output)[0, 0, 0, 0]))

    """
     Not comparing the order since it is not guarantee.
     It may get ordered, but not a lot, one thread can take the GIL before he was supposed to.
    """
    assert len(set(acc) - set(range(100))) == 0, "Output is not the same"
    enqueuer.stop()
Пример #12
0
    def auc_eval(self):
        if isinstance(self.validation_data, Generator):
            assert self.validation_steps is not None, \
                'If validation data is a generator, validation steps must be provided'
            y_pred = []
            y_true = []

            enqueuer = GeneratorEnqueuer(self.validation_data,
                                         use_multiprocessing=False,
                                         wait_time=.01)
            enqueuer.start(workers=1, max_queue_size=10)
            output_generator = enqueuer.get()

            for _ in range(self.validation_steps):
                generator_output = next(output_generator)
                if not hasattr(generator_output, '__len__'):
                    raise ValueError('Output of generator should be a tuple '
                                     '(x, y, sample_weight) '
                                     'or (x, y). Found: ' +
                                     str(generator_output))
                if len(generator_output) == 2:
                    x, y = generator_output
                elif len(generator_output) == 3:
                    x, y, _ = generator_output
                else:
                    raise ValueError('Output of generator should be a tuple '
                                     '(x, y, sample_weight) '
                                     'or (x, y). Found: ' +
                                     str(generator_output))
                outs = self.model.predict_on_batch(x)

                y_pred += outs.tolist()
                y_true += y.tolist()

            enqueuer.stop()
        else:
            y_pred = self.model.predict(self.validation_data[0])
            y_true = self.validation_data[1].astype(np.bool)

        roc_auc = roc_auc_score(y_true=y_true, y_score=y_pred)
        self.auc.append(roc_auc)
        print('AUC Score is %s' % self.auc[-1])
Пример #13
0
class ModelDiagnoser(Callback):
    def __init__(self, data_generator, m_batch_size, num_samples, output_dir, normalization_mean):
        super().__init__()
        self.epoch_index = 0
        self.data_generator = data_generator
        self.batch_size = m_batch_size
        self.num_samples = num_samples
        self.tensorboard_writer = TensorBoardWriter(output_dir)
        self.normalization_mean = normalization_mean
        is_sequence = isinstance(self.data_generator, Sequence)
        if is_sequence:
            self.enqueuer = OrderedEnqueuer(self.data_generator,
                                            use_multiprocessing=True,
                                            shuffle=False)
        else:
            self.enqueuer = GeneratorEnqueuer(self.data_generator,
                                              use_multiprocessing=False,  # todo: how to 'True' ?
                                              wait_time=0.01)
        # todo: integrate the Sequence generator properly
#        import multiprocessing
#        self.enqueuer.start(workers=multiprocessing.cpu_count(), max_queue_size=4)
        self.enqueuer.start(workers=1, max_queue_size=4)

    def on_epoch_end(self, epoch, logs=None):
        output_generator = self.enqueuer.get()
        steps_done = 0
        total_steps = int(np.ceil(np.divide(self.num_samples, self.batch_size)))
        sample_index = 0
        while steps_done < total_steps:
            generator_output = next(output_generator)
            x, y = generator_output[:2]
            x = next(iter(x.values()))
            y = next(iter(y.values()))
            y_pred = self.model.predict(x)
            self.epoch_index += 1

            for i in range(0, len(y_pred)):
                n = steps_done * self.batch_size + i
                if n >= self.num_samples:
                    return

                # rearranging images for visualization
                img_x = self.__reformat_img(x, i)
                img_y = self.__reformat_img(y, i)
                img_p = self.__reformat_img(y_pred, i)

                self.tensorboard_writer.save_image("Epoch-{}/{}/x"
                                                   .format(self.epoch_index, sample_index), img_x)
                self.tensorboard_writer.save_image("Epoch-{}/{}/y"
                                                   .format(self.epoch_index, sample_index), img_y)
                self.tensorboard_writer.save_image("Epoch-{}/{}/y_pred"
                                                   .format(self.epoch_index, sample_index), img_p)
                sample_index += 1

            steps_done += 1

    def __reformat_img(self, img_np_array, img_index):
        img = np.squeeze(img_np_array[img_index, :, :, :])
        img = 255. * (img + self.normalization_mean)  # mean is the training images normalization mean
        img = img[:, :, [2, 1, 0]]  # reordering of channels
        return img

    def on_train_end(self, logs=None):
        self.enqueuer.stop()
        self.tensorboard_writer.close()
Пример #14
0
def validate(config, model, val_client, validation_steps, metrics_id, epoch):

    val_di = val_client.gen()
    from keras.utils import GeneratorEnqueuer

    val_thre = GeneratorEnqueuer(val_di)
    val_thre.start()

    model_metrics = []
    inhouse_metrics = []

    for i in range(validation_steps):

        X, GT = next(val_thre.get())

        Y = model.predict(X)

        model_losses = [(np.sum((gt - y)**2) / gt.shape[0] / 2)
                        for gt, y in zip(GT, Y)]
        mm = sum(model_losses)

        if config.paf_layers > 0 and config.heat_layers > 0:
            GTL6 = np.concatenate([GT[-2], GT[-1]], axis=3)
            YL6 = np.concatenate([Y[-2], Y[-1]], axis=3)
            mm6l1 = model_losses[-2]
            mm6l2 = model_losses[-1]
        elif config.paf_layers == 0 and config.heat_layers > 0:
            GTL6 = GT[-1]
            YL6 = Y[-1]
            mm6l1 = None
            mm6l2 = model_losses[-1]
        else:
            assert False, "Wtf or not implemented"

        m = calc_batch_metrics(i, GTL6, YL6,
                               range(config.heat_start, config.bkg_start))
        inhouse_metrics += [m]

        model_metrics += [(i, mm, mm6l1, mm6l2, m["MAE"].sum() / GTL6.shape[0],
                           m["RMSE"].sum() / GTL6.shape[0], m["DIST"].mean())]
        print(
            "Validating[BATCH: %d] LOSS: %0.4f, S6L1: %0.4f, S6L2: %0.4f, MAE: %0.4f, RMSE: %0.4f, DIST: %0.2f"
            % model_metrics[-1])

    inhouse_metrics = pd.concat(inhouse_metrics)
    inhouse_metrics['epoch'] = epoch
    inhouse_metrics.to_csv("logs/val_scores.%s.%04d.txt" % (metrics_id, epoch),
                           sep="\t")

    model_metrics = pd.DataFrame(model_metrics,
                                 columns=("batch", "loss", "stage6l1",
                                          "stage6l2", "mae", "rmse", "dist"))
    model_metrics['epoch'] = epoch
    del model_metrics['batch']
    model_metrics = model_metrics.groupby('epoch').mean()
    with open('%s.val.tsv' % metrics_id, 'a') as f:
        model_metrics.to_csv(f,
                             header=(epoch == 1),
                             sep="\t",
                             float_format='%.4f')

    val_thre.stop()
def validate(config, model, multi_model, val_client, validation_steps,
             metrics_id, epoch):
    # 得到的X是包含image, confidence mask, paf mask的list,得到的Y是包含6个stage一共12个groundtruth的heapmap
    # 网络一共有三个输入(对于训练时的评估,指标为了反映出训练的效果在测试时网络模型就不用考虑对feature map的输出进行mask了,
    # 可以对所有区域预测),即原始image,以及在训练模型过程中评估时,去除没有标记区域的confidence和paf的mask1, mask2

    val_di = val_client.gen()

    val_thre = GeneratorEnqueuer(
        val_di
    )  # The provided generator can be finite in which case the class will throw
    # a `StopIteration` exception. 但是这里实现的gen貌似不存在这种问题。不过这个函数提供了multiprocess的封装
    val_thre.start()

    model_metrics = []
    inhouse_metrics = []
    t0 = time()
    for i in range(validation_steps
                   ):  # 分成很多个batch进行预测估计的,为了减少validation耗时,在计算validation部分数据
        # validation_steps = val_samples//batch_size 为了防止内存OOM,所以要分batch预测
        # if random.randint(0, 9) < 5:  # 只计算20%的数据
        #     continue
        X, GT = next(val_thre.get())
        Y = multi_model.predict(X)

        model_losses = [(np.sum((gt - y)**2) / gt.shape[0] / 2)
                        for gt, y in zip(GT, Y)]
        # 与模型定义时的loss保持一致,除以2的好处是,平方项的微分会出现2,抵消,可以减少乘法操作
        mm = sum(model_losses)

        if config.paf_layers > 0 and config.heat_layers > 0:
            GTL6 = np.concatenate([GT[-2], GT[-1]], axis=3)
            YL6 = np.concatenate([Y[-2], Y[-1]], axis=3)
            mm6l1 = model_losses[-2]  # NOTICE! 计算的是模型最后一个阶段的预测和groundtruth的距离
            mm6l2 = model_losses[-1]
        elif config.paf_layers == 0 and config.heat_layers > 0:
            GTL6 = GT[-1]
            YL6 = Y[-1]
            mm6l1 = None
            mm6l2 = model_losses[-1]
        else:
            assert False, "Wtf or not implemented"

        m = calc_batch_metrics(i, GTL6, YL6,
                               range(config.heat_start, config.bkg_start))
        inhouse_metrics += [m]

        model_metrics += [(i, mm, mm6l1, mm6l2, m["MAE"].sum() / GTL6.shape[0],
                           m["RMSE"].sum() / GTL6.shape[0], m["DIST"].mean())]
        # 以epoch为key,group之后取平均值
        print(
            "Validating[BATCH: %d] LOSS: %0.4f, S6L1: %0.4f, S6L2: %0.4f, MAE: %0.4f, RMSE: %0.4f, DIST: %0.2f"
            % model_metrics[-1])

    t1 = time()
    print('The CNN prediction time during validation is : ', t1 - t0)
    # inhouse_metrics = pd.concat(inhouse_metrics)
    # inhouse_metrics['epoch'] = epoch
    # inhouse_metrics.to_csv("logs/val_scores.%s.%04d.csv" % (metrics_id, epoch))  # , sep="\t" 默认的不是\t,而是','
    # # 保存的是每个层的细节
    #
    # model_metrics = pd.DataFrame(model_metrics, columns=("batch","loss","stage6l1","stage6l2","mae","rmse","dist") )
    # model_metrics['epoch'] = epoch
    # del model_metrics['batch']
    # model_metrics = model_metrics.groupby('epoch').mean()
    # with open('%s.val.tsv' % metrics_id, 'a') as f:
    #     model_metrics.to_csv(f, header=(epoch==1), float_format='%.4f')  # sep="\t",
    #
    # print(inhouse_metrics[["layer", "epoch", "MAE", "RMSE", "DIST"]].groupby(["layer", "epoch"]).mean())

    val_thre.stop()