示例#1
0
    def train(self, generator: StackedMNISTData, epochs: np.int = 10) -> bool:
        """
        Train model if required. As we have a one-channel model we take care to
        only use the first channel of the data.
        """
        self.done_training = self.load_weights()

        if self.force_relearn or self.done_training is False:
            # Get hold of data
            x_train, y_train = generator.get_full_data_set(training=True)
            x_test, y_test = generator.get_full_data_set(training=False)

            # "Translate": Only look at "red" channel; only use the last digit. Use one-hot for labels during training
            x_train = x_train[:, :, :, [0]]
            y_train = keras.utils.to_categorical((y_train % 10).astype(np.int),
                                                 10)
            x_test = x_test[:, :, :, [0]]
            y_test = keras.utils.to_categorical((y_test % 10).astype(np.int),
                                                10)

            # Fit model
            self.model.fit(x=x_train,
                           y=y_train,
                           batch_size=1024,
                           epochs=epochs,
                           validation_data=(x_test, y_test))

            # Save weights and leave
            self.model.save_weights(filepath=self.file_name)
            self.done_training = True

        return self.done_training
示例#2
0
    def demo(self, generator: StackedMNISTData, epochs):
        if self.channels == 3:
            tol = 0.5
            net = VerificationNet(
                force_learn=False,
                file_name="./models/verification_model_stacked")
            factor = 1 / (1 - 0.111)
        elif self.channels == 1:
            tol = 0.8
            net = VerificationNet(force_learn=False)
            factor = 10 / 9
        else:
            raise ValueError(
                f"self.channels should be 1 or 3 but was {self.channels}")

        print(f"Tolerance set to {tol}, due to {self.channels} channels")
        self.train(generator=generator,
                   epochs=epochs,
                   batch_size=generator.default_batch_size)

        # AE-BASIC
        print('+' + '-' * 20 + 'Reconstruction' + '-' * 20 + '+')
        plot = True
        imgs, labels = generator.get_random_batch(training=True,
                                                  batch_size=10 + (990 *
                                                                   (1 - plot)))
        data = self.reconstruct(data=imgs, plot=plot)
        cov = net.check_class_coverage(data=data, tolerance=tol)
        predictability, accuracy = net.check_predictability(
            data=data, correct_labels=labels, tolerance=tol)
        print(
            f"Coverage: {cov*factor:.2f} \nAccuracy: {accuracy:.2f}\nPredictability/Quality: {predictability:.2f}"
        )

        # AE-GEN
        print('+' + '-' * 20 + 'Generative' + '-' * 20 + '+')
        gen_data = self.generate_random_images(amount=16, plot=True)

        gen_cov = net.check_class_coverage(gen_data, tolerance=tol)
        gen_pred, _ = net.check_predictability(gen_data, tolerance=tol)

        print(
            f"Coverage: {gen_cov*cov:.2f} \nPredictability/Quality: {gen_pred:.2f}"
        )

        # AE-ANOM
        print('+' + '-' * 20 + 'Anomaly' + '-' * 20 + '+')
        imgs, _ = generator.get_random_batch(training=False, batch_size=160)
        self.anom_det(data=imgs, k=16)
示例#3
0
def main():
    datamode = config.GAN_DATAMODE
    data_generator = StackedMNISTData(mode=datamode, default_batch_size=2048)

    x_test, y_test = data_generator.get_full_data_set(training=False)
    x_test = x_test.astype(np.float64)

    image_shape = x_test[0].shape

    # Create a generator model and a discriminator model.
    generator_model = GeneratorModel(image_shape)
    discriminator_model = DiscriminatorModel(image_shape)

    classifier = None

    # Colors have 1000 classes, whilst regular mnist only has 10.
    if datamode.name.startswith("COLOR"):
        num_classes = 1000
    else:
        num_classes = 10

    if config.GAN_USE_CLASSIFIER:
        # Get a regular classifier model.
        classifier = basic_classifier_model.get_model(datamode,
                                                      data_generator,
                                                      num_classes=num_classes,
                                                      input_shape=image_shape)

    # GAN model.
    gan_model = GANModel(generator_model, discriminator_model, classifier,
                         num_classes)

    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(1))
    model.compile("adam", loss=GANModel.custom_loss_func)

    # In order to save different models with the classifier and without.
    folder = f"./models/gan_{config.GAN_BATCH_SIZE}_{int(config.GAN_USE_CLASSIFIER)}" \
             f"_{config.GAN_INPUT_DIM_SIZE}_{datamode.name}_{int(config.GAN_BATCHNORM_GEN)}/"

    if not os.path.exists(folder):
        os.mkdir(folder)

    gan_file_name = folder + "model.tf"
    if config.LOAD_GAN:
        latents = np.random.randn(1, config.GAN_INPUT_DIM_SIZE)
        # Again this trick in order to load weights.
        gan_model.fit(np.array(latents), np.array([1]), epochs=1, verbose=0)
        gan_model.load_weights(gan_file_name)
    else:
        gan_model.train(data_generator)
        gan_model.save_weights(gan_file_name)

    net = util.get_verification_model(datamode, data_generator)

    batch_size = 36

    # Generate 3 images and plot them.
    for _ in range(3):
        # Generate images
        latents = np.random.randn(batch_size, config.GAN_INPUT_DIM_SIZE)
        generated_images = generator_model.predict(latents,
                                                   batch_size=batch_size)
        # Draw the generated images.
        draw.draw_images(generated_images, size=6)

    # This is for checking mode collapse
    cov = net.check_class_coverage(data=generated_images, tolerance=.8)
    pred, _ = net.check_predictability(data=generated_images)
    print(f"GAN - Generated images - Coverage: {100 * cov:.2f}%")
    print(f"GAN - Generated images - Predictability: {100 * pred:.2f}%")
    print("---------------------------------------------")
示例#4
0
        """
        # Get predictions; only keep those where all channels were "confident enough"
        predictions, beliefs = self.predict(data=data)
        predictions = predictions[beliefs >= tolerance - 0.1]
        predictability = len(predictions) / len(data)

        if correct_labels is not None:
            # Drop those that were below threshold
            correct_labels = correct_labels[beliefs >= tolerance - 0.1]
            accuracy = np.sum(predictions == correct_labels) / len(data)
        else:
            accuracy = None

        return predictability, accuracy


if __name__ == "__main__":
    gen = StackedMNISTData(mode=DataMode.COLOR_BINARY_COMPLETE,
                           default_batch_size=2048)
    net = VerificationNet(force_learn=True,
                          file_name="./models/verification_model_stacked")
    net.train(generator=gen, epochs=10)

    # I have no data generator (VAE or whatever) here, so just use a sampled set
    img, labels = gen.get_random_batch(training=True, batch_size=25000)
    cov = net.check_class_coverage(data=img, tolerance=.98)
    pred, acc = net.check_predictability(data=img, correct_labels=labels)
    print(f"Coverage: {100*cov:.2f}%")
    print(f"Predictability: {100*pred:.2f}%")
    print(f"Accuracy: {100 * acc:.2f}%")
示例#5
0
                        self.generator.predict(noise))[0]
                    class_coverage = verifier.check_class_coverage(
                        self.generator.predict(noise))
                    print(predictability, class_coverage)
                    self.save_weights("{0]_".format(epoch) + filename)

    def save_weights(self, filename):
        self.discriminator.save('./models_gan/disc_' + filename)
        self.generator.save('./models_gan/gen_' + filename)

    def load_weights(self, filename):
        self.discriminator.load_weights('./models_gan/disc_' + filename)
        self.generator.load_weights('./models_gan/gen_' + filename)


if __name__ == '__main__':
    # Limit gpu usage.
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    set_session(sess)

    dataset = 'mono_float_complete'

    verifier = VerificationNet(file_name='./models/' + dataset + '.h5')
    verifier.load_weights()
    gen = StackedMNISTData(mode=DataMode.MONO_FLOAT_COMPLETE,
                           default_batch_size=9)

    dcgan = DCGAN(True if 'color' in dataset else False)
    dcgan.fit(gen, verifier, epochs=100000, batch_size=32, save_interval=500)
示例#6
0
        gen_cov = net.check_class_coverage(gen_data, tolerance=tol)
        gen_pred, _ = net.check_predictability(gen_data, tolerance=tol)

        print(
            f"Coverage: {gen_cov*cov:.2f} \nPredictability/Quality: {gen_pred:.2f}"
        )

        # AE-ANOM
        print('+' + '-' * 20 + 'Anomaly' + '-' * 20 + '+')
        imgs, _ = generator.get_random_batch(training=False, batch_size=160)
        self.anom_det(data=imgs, k=16)


if __name__ == '__main__':
    mode = DataMode.MONO_BINARY_MISSING

    if mode is DataMode.COLOR_BINARY_MISSING:
        fp = './models/10_color_autoencoder_model'
    elif mode is DataMode.MONO_BINARY_MISSING:
        fp = './models/10_mono_auto_encoder'
    else:
        raise NotImplementedError(
            "Have not trained models on other modes than necessary.")

    gen = StackedMNISTData(mode=mode, default_batch_size=2048)
    ae = Autoencoder(encoder_dim=10 * gen.channels * gen.channels,
                     channels=gen.channels,
                     file_name=fp,
                     must_train=False)
    ae.demo(generator=gen, epochs=300)
示例#7
0
def main():
    # Which data to use.
    datamode = config.AUTO_GEN_DATAMODE
    # Create a generator for that type of data.
    generator = StackedMNISTData(mode=datamode, default_batch_size=2048)

    # Take out the testing dataset.
    x_test, y_test = generator.get_full_data_set(training=False)
    x_test = x_test.astype(np.float64)

    # Create a verification model.
    net = util.get_verification_model(datamode, generator)

    autoencoder_model = get_autoencoder(datamode, generator)

    draw.predict_and_draw(autoencoder_model,
                          np.array(x_test[0:16]),
                          np.array(y_test[0:16]),
                          mult_255=False)
    batch_size = 16

    # Reconstruct the images of the test set.
    reconstructed_images = autoencoder_model.predict(x_test, batch_size=batch_size)
    # Check the mode collapse. If coverage is high then we don't have mode collapse.
    cov = net.check_class_coverage(data=reconstructed_images, tolerance=.8)
    pred, acc = net.check_predictability(data=reconstructed_images, correct_labels=y_test)
    print(f"Autoencoder - Reconstructed images - Coverage: {100 * cov:.2f}%")
    print(f"Autoencoder - Reconstructed images - Predictability : {100 * pred:.2f}%")
    # This one should be over 80%
    print(f"Autoencoder - Reconstructed images - Accuracy: {100 * acc:.2f}%")
    print("---------------------------------------------")

    # Random latents.
    if datamode.name.startswith("COLOR") and config.AUTO_SPLIT_RGB:
        # if color Then we need 3 times as many batches
        batch_size *= 3
    latents = np.random.randn(batch_size, config.AUTO_LATENT_SIZE)
    # Generate images by using the random data in the decoder model.
    generated_images = autoencoder_model.decoder_model(latents)
    # Convert them to numpy arrays.
    generated_images = tf.keras.backend.eval(generated_images)
    # Draw the generated images.
    draw.draw_images(generated_images, mult_255=False)

    cov = net.check_class_coverage(data=generated_images, tolerance=.8)
    pred, _ = net.check_predictability(data=generated_images)
    print(f"Autoencoder - Generated images - Coverage: {100 * cov:.2f}%")
    print(f"Autoencoder - Generated images - Predictability: {100 * pred:.2f}%")
    print("---------------------------------------------")

    #
    # Anomaly detector
    #
    datamode = config.AUTO_ANOM_DATAMODE
    generator = StackedMNISTData(mode=datamode, default_batch_size=2048)
    x_test, y_test = generator.get_full_data_set(training=False)
    x_test = x_test.astype(np.float64)

    autoencoder_model = get_autoencoder(datamode, generator)

    prediction = autoencoder_model.predict(x_test)
    # Flatten in order to simplify the loss calculation.
    x_test_flatten = x_test.reshape(x_test.shape[0], np.product(x_test.shape[1:]))
    pred_flatten = prediction.reshape(prediction.shape[0], np.product(prediction.shape[1:]))

    loss = losses.mse(x_test_flatten, pred_flatten)
    # get the top 16 with most loss.
    top_loss = loss.numpy().argsort()[-16:][::-1]

    top_16 = []
    top_16_labels = []
    for i in top_loss:
        top_16.append(x_test[i])
        top_16_labels.append(str(y_test[i]))

    # Conclusion: Autoencoders work well with finding anomalies, however quite bad for being a
    # generator.
    draw.draw_images(np.array(top_16), labels=top_16_labels, mult_255=False)
示例#8
0
def train_all():
    param = read_json('pivotal_parameters.json')

    data_mode_name = [
        'mono_float_complete', 'mono_float_missing', 'mono_binary_complete',
        'mono_binary_missing', 'color_float_complete', 'color_float_missing',
        'color_binary_complete', 'color_binary_missing'
    ]

    models = ['vae', 'dcgan']

    for model_name in models:
        for dataset in data_mode_name:
            if model_name == 'ae':
                if 'mono' in dataset:
                    latent_size = 40
                else:
                    latent_size = 80
            else:
                if 'mono' in dataset:
                    latent_size = 20
                else:
                    latent_size = 40
            if model_name == 'ae' and (dataset == 'color_float_missing'
                                       or dataset == 'color_binary_missing'):
                continue
            if (dataset == 'color_float_complete'
                    or dataset == 'color_binary_missing'):
                continue

            # Initialize verification net.
            force_learn = param['verification']['force_learn']
            verifier = VerificationNet(file_name='./models/' + dataset + '.h5',
                                       force_learn=force_learn)
            model, encoder, decoder = create_model(model_name,
                                                   latent_size,
                                                   color='color' in dataset,
                                                   binary='binary' in dataset)
            gen = StackedMNISTData(mode=get_data_mode(dataset),
                                   default_batch_size=9)
            if param['verification']['load_weights']:
                verifier.load_weights()
            if force_learn:
                verifier.train(gen)
            try:
                model.load_weights(dataset + '.h5')
            except Exception as e:
                print(e)
            rec_pred, rec_acc, _ = reconstruct_images(model, gen, verifier)
            while rec_pred < 0.805 or rec_acc < 0.805:
                print("Training on dataset {0} with {1}".format(
                    dataset, model_name))
                if param['load_weights']:
                    model.load_weights(dataset + '.h5')
                if param['train']:
                    model.fit(gen, batch_size=128, epochs=20)
                if param['save_weights']:
                    model.save_weights(dataset + '.h5')
                x, _ = gen.get_full_data_set(training=True)

                rec_pred, rec_acc, _ = reconstruct_images(model, gen, verifier)
示例#9
0
    #  x  - color_binary_complete = 256
    #  x  - color_binary_missing = 256
    # DCGAN:
    #  x  - mono_float_complete
    #  x  - color_float_complete

    # Limit gpu usage.
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    set_session(sess)

    param = read_json('pivotal_parameters.json')
    dataset = param['dataset']

    # Initialize data generator.
    gen = StackedMNISTData(mode=get_data_mode(dataset), default_batch_size=9)
    x, y = gen.get_full_data_set(training=False)

    # Initialize verification net.
    force_learn = param['verification']['force_learn']
    verifier = VerificationNet(file_name='./models/' + dataset + '.h5',
                               force_learn=force_learn)
    model, encoder, decoder = create_model(param['model'],
                                           param['latent_size'],
                                           color='color' in dataset,
                                           binary='binary' in dataset)
    gen = StackedMNISTData(mode=get_data_mode(dataset), default_batch_size=9)

    if param['verification']['load_weights']:
        verifier.load_weights()
    if force_learn: