Exemplo n.º 1
0
 def _log_metrics_to_file(self):
     path = os.path.join(self.artifacts_path, 'metrics.csv')
     save_to_csv(path, [
         np.average(self.metrics['MSE']),
         np.average(self.metrics['KLD']), self.metrics['NMI'][-1],
         self.metrics['ARS'][-1]
     ])
Exemplo n.º 2
0
 def train_model(self, data_loader, optimizer, epochs: int=200,
                 iterations: int=10000, gamma: float=0.1, epsilon=0.00001):
     print("Pretraining autoencoder:")
     training_start = time()
     self.train_autoencoder(data_loader, optimizer, epochs, epsilon)
     print("Pretraining finished, training with clustering")
     self.load_state_dict(torch.load(os.path.join(self.artifacts_path,
                                                  "best_autoencoder_model.pt")))
     self.train_with_clustering(data_loader, optimizer, iterations, gamma)
     training_time = time() - training_start
     save_to_csv(os.path.join(self.artifacts_path, "time.csv"), [training_time])
     print("Done!")
Exemplo n.º 3
0
def main(args):
    os.makedirs(os.path.join(args.artifacts_path), exist_ok=True)
    # Init data
    train_data, test_data = load_patches(args.patches_dir,
                                         args.pixel_neighborhood)
    train_data.normalize_labels()
    test_data.normalize_labels()
    val_data = BalancedSubset(train_data, args.val_set_part)
    # Callbacks
    early = EarlyStopping(patience=args.patience)
    logger = CSVLogger(
        os.path.join(args.artifacts_path, args.output_file) + ".csv")
    checkpoint = ModelCheckpoint(
        os.path.join(args.artifacts_path, args.output_file) + "_model",
        save_best_only=True)
    timer = TimeHistory()

    # Normalize data
    max_ = train_data.max if train_data.max > val_data.max else val_data.max
    min_ = train_data.min if train_data.min < val_data.min else val_data.min
    train_data.normalize_min_max(min_=min_, max_=max_)
    val_data.normalize_min_max(min_=min_, max_=max_)
    test_data.normalize_min_max(min_=min_, max_=max_)

    # Augment data
    transformation = UpScaleTransform()
    augmenter = OfflineAugmenter(transformation, sampling_mode="max_twice")
    augmented_data, augmented_labels = augmenter.augment(train_data,
                                                         transformations=1)
    train_data.vstack(augmented_data)
    train_data.hstack(augmented_labels)

    if args.pixel_neighborhood == 1:
        train_data.expand_dims(axis=-1)
        test_data.expand_dims(axis=-1)
        val_data.expand_dims(axis=-1)

    if args.classes_count == 0:
        args.classes_count = len(np.unique(test_data.get_labels()))

    # Build model
    if args.pixel_neighborhood == 1:
        model = build_1d_model((test_data.shape[1:]), args.kernels,
                               args.kernel_size, args.classes_count)
    else:
        settings = build_settings_for_dataset(
            (args.pixel_neighborhood, args.pixel_neighborhood))
        model = build_3d_model(settings, args.classes_count,
                               test_data.shape[-1])

    # Train model
    history = model.fit(x=train_data.get_data(),
                        y=train_data.get_one_hot_labels(args.classes_count),
                        batch_size=args.batch_size,
                        epochs=args.epochs,
                        verbose=args.verbose,
                        callbacks=[early, logger, checkpoint, timer],
                        validation_data=(val_data.get_data(),
                                         val_data.get_one_hot_labels(
                                             args.classes_count)))

    # Load best model
    model = load_model(
        os.path.join(args.artifacts_path, args.output_file) + "_model")

    # Calculate test set score
    test_score = model.evaluate(x=test_data.get_data(),
                                y=test_data.get_one_hot_labels(
                                    args.classes_count))

    # Calculate accuracy for each class
    predictions = model.predict(x=test_data.get_data())
    predictions = np.argmax(predictions, axis=1)
    class_accuracy = calculate_class_accuracy(predictions,
                                              test_data.get_labels(),
                                              args.classes_count)
    # Collect metrics
    train_score = max(history.history['acc'])
    val_score = max(history.history['val_acc'])
    times = timer.times
    time = times[-1]
    avg_epoch_time = np.average(np.array(timer.average))
    epochs = len(history.epoch)
    kappa = cohen_kappa_score(predictions, test_data.get_labels())
    # Save metrics
    metrics_path = os.path.join(args.artifacts_path, "metrics.csv")
    kappa_path = os.path.join(args.artifacts_path, "kappa.csv")
    save_to_csv(
        metrics_path,
        [train_score, val_score, test_score[1], time, epochs, avg_epoch_time])
    class_accuracy_path = os.path.join(args.artifacts_path,
                                       "class_accuracy.csv")
    save_to_csv(class_accuracy_path, class_accuracy)
    save_to_csv(kappa_path, [kappa])
    np.savetxt(os.path.join(args.artifacts_path, args.output_file) +
               "_times.csv",
               times,
               fmt="%1.4f")
Exemplo n.º 4
0
def main(args):
    os.makedirs(os.path.join(args.artifacts_path), exist_ok=True)
    # Init data
    test_data = HyperspectralDataset(args.dataset_path,
                                     args.gt_path,
                                     neighborhood_size=args.pixel_neighborhood)
    test_data.normalize_labels()
    if args.pixel_neighborhood == 1:
        test_data.expand_dims(axis=-1)
    if args.balanced:
        train_data = BalancedSubset(test_data, args.train_samples)
        val_data = BalancedSubset(train_data, args.val_set_part)
    elif args.balanced == 0:
        train_data = ImbalancedSubset(test_data, args.train_samples)
        val_data = ImbalancedSubset(train_data, args.val_set_part)
    elif args.balanced == 2:  # Case for balanced indiana
        train_data = CustomSizeSubset(test_data, [
            30, 250, 250, 150, 250, 250, 20, 250, 15, 250, 250, 250, 150, 250,
            50, 50
        ])
        val_data = BalancedSubset(train_data, args.val_set_part)
    # Callbacks
    early = EarlyStopping(patience=args.patience)
    logger = CSVLogger(
        os.path.join(args.artifacts_path, args.output_file) + ".csv")
    checkpoint = ModelCheckpoint(
        os.path.join(args.artifacts_path, args.output_file) + "_model",
        save_best_only=True)
    timer = TimeHistory()

    # Normalize data
    max_ = train_data.max if train_data.max > val_data.max else val_data.max
    min_ = train_data.min if train_data.min < val_data.min else val_data.min
    train_data.normalize_min_max(min_=min_, max_=max_)
    val_data.normalize_min_max(min_=min_, max_=max_)
    test_data.normalize_min_max(min_=min_, max_=max_)

    if args.pixel_neighbourhood == 1:
        test_data.expand_dims(axis=-1)
        train_data.expand_dims(axis=-1)
        val_data.expand_dims(axis=-1)

    if args.classes_count == 0:
        args.classes_count = len(np.unique(test_data.get_labels()))

    # Build model
    if args.pixel_neighborhood == 1:
        model = build_1d_model((test_data.shape[1:]), args.kernels,
                               args.kernel_size, args.classes_count)
    else:
        settings = build_settings_for_dataset(
            (args.pixel_neighborhood, args.pixel_neighborhood))
        model = build_3d_model(settings, args.classes_count,
                               test_data.shape[-1])

    # Train model
    history = model.fit(x=train_data.get_data(),
                        y=train_data.get_one_hot_labels(args.classes_count),
                        batch_size=args.batch_size,
                        epochs=args.epochs,
                        verbose=args.verbose,
                        callbacks=[early, logger, checkpoint, timer],
                        validation_data=(val_data.get_data(),
                                         val_data.get_one_hot_labels(
                                             args.classes_count)))

    # Load best model
    model = load_model(
        os.path.join(args.artifacts_path, args.output_file) + "_model")
    # Remove last dimension
    train_data.data = train_data.get_data()[:, :, 0]
    test_data.data = test_data.get_data()[:, :, 0]
    from time import time
    transformation = OnlineLightenTransform(scaling=[0.05, 0.1])
    start_online = time()
    transformation.fit(train_data.get_data())
    augmenter = OnlineAugmenter()
    test_score, class_accuracy = augmenter.evaluate(model, test_data,
                                                    transformation)
    online_time = time() - start_online
    # Collect metrics
    train_score = max(history.history['acc'])
    val_score = max(history.history['val_acc'])
    times = timer.times
    time_ = times[-1]
    avg_epoch_time = np.average(np.array(timer.average))
    epochs = len(history.epoch)

    # Save metrics
    metrics_path = os.path.join(args.artifacts_path, "metrics.csv")
    time_path = os.path.join(args.artifacts_path, "inference_time.csv")
    save_to_csv(
        metrics_path,
        [train_score, val_score, test_score, time_, epochs, avg_epoch_time])
    class_accuracy_path = os.path.join(args.artifacts_path,
                                       "class_accuracy.csv")
    save_to_csv(class_accuracy_path, class_accuracy)
    save_to_csv(time_path, [online_time])
    np.savetxt(os.path.join(args.artifacts_path, args.output_file) +
               "_times.csv",
               times,
               fmt="%1.4f")
Exemplo n.º 5
0
def main(args):
    os.makedirs(os.path.join(args.artifacts_path), exist_ok=True)
    # Init data
    test_data = HyperspectralDataset(args.dataset_path, args.gt_path,
                                     neighbourhood_size=args.pixel_neighbourhood)
    test_data.normalize_labels()
    if args.balanced == 1:
        train_data = BalancedSubset(test_data, args.train_samples)
        val_data = BalancedSubset(train_data, args.val_set_part)
    elif args.balanced == 0:
        train_data = ImbalancedSubset(test_data, args.train_samples)
        val_data = ImbalancedSubset(train_data, args.val_set_part)
    elif args.balanced == 2:  # Case for balanced indiana
        train_data = CustomSizeSubset(test_data, [30, 250, 250, 150, 250, 250,
                                                  20, 250, 15, 250, 250, 250,
                                                  150, 250, 50, 50])
        val_data = BalancedSubset(train_data, args.val_set_part)
    # Normalize data
    max_ = train_data.max if train_data.max > val_data.max else val_data.max
    min_ = train_data.min if train_data.min < val_data.min else val_data.min
    train_data.normalize_min_max(min_=min_, max_=max_)
    val_data.normalize_min_max(min_=min_, max_=max_)
    test_data.normalize_min_max(min_=min_, max_=max_)

    custom_data_loader = OrderedDataLoader(train_data, args.batch_size)
    data_loader = DataLoader(train_data, batch_size=args.batch_size,
                             shuffle=True, drop_last=True)

    cuda = True if torch.cuda.is_available() else False
    input_shape = bands_count = train_data.shape[-1]
    if args.classes_count == 0:
        args.classes_count = len(np.unique(train_data.get_labels()))

    classifier_criterion = nn.CrossEntropyLoss()
    # Initialize generator, discriminator and classifier
    generator = Generator(input_shape, args.classes_count)
    discriminator = Discriminator(input_shape)
    classifier = Classifier(classifier_criterion, input_shape, args.classes_count,
                            use_cuda=cuda, patience=args.classifier_patience)

    # Optimizers
    optimizer_G = torch.optim.Adam(generator.parameters(),
                                   lr=args.learning_rate,
                                   betas=(args.b1, args.b2))
    optimizer_D = torch.optim.Adam(discriminator.parameters(),
                                   lr=args.learning_rate,
                                   betas=(args.b1, args.b2))
    optimizer_C = torch.optim.Adam(classifier.parameters(),
                                   lr=args.learning_rate,
                                   betas=(args.b1, args.b2))

    if cuda:
        generator = generator.cuda()
        discriminator = discriminator.cuda()
        classifier = classifier.cuda()
        classifier_criterion = classifier_criterion.cuda()

    # Train classifier
    classifier.train_(data_loader, optimizer_C, args.n_epochs_gan)

    gan = WGAN(generator, discriminator, classifier, optimizer_G, optimizer_D,
               use_cuda=cuda, lambda_gp=args.lambda_gp, critic_iters=args.n_critic,
               patience=args.patience_gan, summary_writer=SummaryWriter(args.artifacts_path),
               generator_checkout=args.generator_checkout)
    # Train GAN
    gan.train(custom_data_loader, args.n_epochs_gan, bands_count,
              args.batch_size, args.classes_count,
              os.path.join(args.artifacts_path, args.output_file) + "_generator_model")

    # Generate samples using trained Generator
    generator = Generator(input_shape, args.classes_count)
    generator_path = os.path.join(args.artifacts_path, args.output_file + "_generator_model")
    generator.load_state_dict(torch.load(generator_path))
    if cuda:
        generator = generator.cuda()
    train_data.convert_to_numpy()

    device = 'gpu' if cuda is True else 'cpu'
    samples_generator = SamplesGenerator(device=device)
    generated_x, generated_y = samples_generator.generate(train_data,
                                                          generator)
    generated_x = np.reshape(generated_x.detach().cpu().numpy(),
                             generated_x.shape + (1, ))

    train_data.expand_dims(axis=-1)
    test_data.expand_dims(axis=-1)
    val_data.expand_dims(axis=-1)

    train_data.vstack(generated_x)
    train_data.hstack(generated_y)

    # Callbacks
    early = EarlyStopping(patience=args.patience)
    logger = CSVLogger(os.path.join(args.artifacts_path, args.output_file) + ".csv")
    checkpoint = ModelCheckpoint(os.path.join(args.artifacts_path,
                                              args.output_file) + "_model",
                                 save_best_only=True)
    timer = TimeHistory()

    # Build model
    model = build_1d_model((test_data.shape[1:]), args.kernels,
                            args.kernel_size, args.classes_count)

    # Train model
    history = model.fit(x=train_data.get_data(),
                        y=train_data.get_one_hot_labels(args.classes_count),
                        batch_size=args.batch_size,
                        epochs=args.epochs,
                        verbose=args.verbose,
                        callbacks=[early, logger, checkpoint, timer],
                        validation_data=(val_data.get_data(),
                                         val_data.get_one_hot_labels(args.classes_count)))

    # Load best model
    model = load_model(os.path.join(args.artifacts_path, args.output_file) + "_model")

    # Calculate test set score
    test_score = model.evaluate(x=test_data.get_data(),
                                y=test_data.get_one_hot_labels(
                                    args.classes_count))

    # Calculate accuracy for each class
    predictions = model.predict(x=test_data.get_data())
    predictions = np.argmax(predictions, axis=1)
    class_accuracy = calculate_class_accuracy(predictions,
                                              test_data.get_labels(),
                                              args.classes_count)
    # Collect metrics
    train_score = max(history.history['acc'])
    val_score = max(history.history['val_acc'])
    times = timer.times
    time = times[-1]
    avg_epoch_time = np.average(np.array(timer.average))
    epochs = len(history.epoch)

    # Save metrics
    metrics_path = os.path.join(args.artifacts_path, "metrics.csv")
    save_to_csv(metrics_path, [train_score, val_score,
                               test_score[1], time, epochs, avg_epoch_time])
    class_accuracy_path = os.path.join(args.artifacts_path,
                                       "class_accuracy.csv")
    save_to_csv(class_accuracy_path, class_accuracy)
    np.savetxt(os.path.join(args.artifacts_path, args.output_file) +
               "_times.csv", times, fmt="%1.4f")
Exemplo n.º 6
0
def main(args):
    os.makedirs(os.path.join(args.artifacts_path), exist_ok=True)
    # Init data
    test_data = HyperspectralDataset(args.dataset_path, args.gt_path,
                                     neighbourhood_size=args.pixel_neighbourhood)
    mapper = BandMapper()
    test_data.data = mapper.map(test_data.get_data(), args.bands)
    test_data.normalize_labels()
    if args.balanced == 1:
        train_data = BalancedSubset(test_data, args.train_samples)
        val_data = BalancedSubset(train_data, args.val_set_part)
    elif args.balanced == 0:
        train_data = ImbalancedSubset(test_data, args.train_samples)
        val_data = ImbalancedSubset(train_data, args.val_set_part)
    elif args.balanced == 2:  # Case for balanced indiana
        train_data = CustomSizeSubset(test_data, [30, 250, 250, 150, 250, 250,
                                                  20, 250, 15, 250, 250, 250,
                                                  150, 250, 50, 50])
        val_data = BalancedSubset(train_data, args.val_set_part)

    # Callbacks
    early = EarlyStopping(patience=args.patience)
    logger = CSVLogger(os.path.join(args.artifacts_path, args.output_file) + ".csv")
    checkpoint = ModelCheckpoint(os.path.join(args.artifacts_path, args.output_file) + "_model",
                                 save_best_only=True)
    timer = TimeHistory()
    # Normalize data
    max_ = train_data.max if train_data.max > val_data.max else val_data.max
    min_ = train_data.min if train_data.min < val_data.min else val_data.min
    train_data.normalize_min_max(min_=min_, max_=max_)
    val_data.normalize_min_max(min_=min_, max_=max_)
    test_data.normalize_min_max(min_=min_, max_=max_)

    if args.pixel_neighbourhood == 1:
        test_data.expand_dims(axis=-1)
        train_data.expand_dims(axis=-1)
        val_data.expand_dims(axis=-1)

    if args.classes_count == 0:
        args.classes_count = len(np.unique(test_data.get_labels()))

    # Load model
    model = load_model(os.path.join(args.models_dir, args.output_file + "_model"))

    model.pop()
    model.pop()
    model.pop()

    first = Dense(units=20, activation='relu')(model.output)
    second = Dense(units=20, activation='relu')(first)
    new_layer = Dense(units=args.classes_count, activation='softmax')(second)

    model = Model(inputs=model.input, outputs=new_layer)

    # for layer in model.layers[0:7]:
    #     layer.trainable = False
    if args.blocks >= 1:
        model.layers[1].trainable = False
    if args.blocks >= 2:
        model.layers[3].trainable = False
    if args.blocks >= 3:
        model.layers[5].trainable = False

    optimizer = Adam(lr=0.0001)
    model.compile(optimizer=optimizer,
                  metrics=['accuracy'],
                  loss='categorical_crossentropy')
    print(model.summary())
    history = model.fit(x=train_data.get_data(),
                        y=train_data.get_one_hot_labels(args.classes_count),
                        batch_size=args.batch_size,
                        epochs=args.epochs,
                        verbose=args.verbose,
                        callbacks=[early, logger, checkpoint, timer],
                        validation_data=(val_data.get_data(),
                                         val_data.get_one_hot_labels(args.classes_count)))

    # Load best model
    model = load_model(os.path.join(args.artifacts_path, args.output_file + "_model"))

    # Calculate test set score
    test_score = model.evaluate(x=test_data.get_data(),
                                y=test_data.get_one_hot_labels(args.classes_count))

    # Calculate accuracy for each class
    predictions = model.predict(x=test_data.get_data())
    predictions = np.argmax(predictions, axis=1)
    class_accuracy = calculate_class_accuracy(predictions,
                                              test_data.get_labels(),
                                              args.classes_count)
    # Collect metrics
    train_score = max(history.history['acc'])
    val_score = max(history.history['val_acc'])
    times = timer.times
    time = times[-1]
    avg_epoch_time = np.average(np.array(timer.average))
    epochs = len(history.epoch)

    # Save metrics
    metrics_path = os.path.join(args.artifacts_path, "metrics.csv")
    save_to_csv(metrics_path, [train_score, val_score,
                               test_score[1], time, epochs, avg_epoch_time])
    class_accuracy_path = os.path.join(args.artifacts_path, "class_accuracy.csv")
    save_to_csv(class_accuracy_path, class_accuracy)
    np.savetxt(os.path.join(args.artifacts_path, args.output_file) +
               "_times.csv", times, fmt="%1.4f")