Пример #1
0
def main_predict() -> None:
    detect_gpu()
    device = get_device()
    save_dir = path.join('modelling', 'model_2020_04_08_2')

    model = CNNAugDataRegularized((32, 64, 128, 256), (512, 128), 4, device)
    data = FirstAugmentedDataset()
    tracker = PerformanceTracker(save_dir)
    model.train(data, 18, 20, tracker, learning_rate=0.0001)

    test_X, imgs_ids = data.get_test()
    pred_y = model.predict(test_X)
    create_submit(pred_y, imgs_ids, path.join(save_dir, 'submission.csv'))
Пример #2
0
    def train(self,
              data: Dataset,
              epochs: int,
              batch_size: int,
              tracker: PerformanceTracker = None,
              learning_rate=0.001) -> None:
        val_X, val_y = data.get_val()
        val_X, val_y = torch.from_numpy(val_X), torch.from_numpy(val_y)
        val_X = val_X.permute(0, 3, 1, 2)
        val_X, val_y = val_X.float(), val_y.long()
        # Assigned to GPU in batches in validate method.

        model = self
        model = model.to(self.used_device)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        num_batches = data.num_possible_batches(batch_size)
        batch_range = range(
            num_batches
        )  # from 0, to the len of x, stepping BATCH_SIZE at a time. [:50] ..for now just to dev

        for epoch in range(epochs):
            train_losses = []
            train_accuracies = []
            epoch_data = deepcopy(data)

            for _ in tqdm(batch_range):
                batch_X, batch_y = epoch_data.get_next_batch(batch_size)
                batch_X, batch_y = torch.from_numpy(batch_X), torch.from_numpy(
                    batch_y)
                # batch_X = batch_X.view(-1, 3, 128, 128)
                batch_X = batch_X.permute(0, 3, 1, 2)
                batch_X, batch_y = batch_X.float(), batch_y.long()
                batch_X, batch_y = batch_X.to(self.used_device), batch_y.to(
                    self.used_device)

                model.zero_grad()
                # print('Train pass')
                pred_y = model(batch_X)
                loss = self.loss_function(pred_y, batch_y)
                train_losses.append(loss.item())
                loss.backward()
                optimizer.step()
                pred_y_indices = torch.argmax(pred_y, dim=1)

                num_correct = int((pred_y_indices == batch_y).int().sum())
                accuracy = num_correct / batch_size
                train_accuracies.append(accuracy)

            print('Batch prediction:')
            print(pred_y)
            val_loss, val_acc = model.validate(val_X, val_y, batch_size)
            tracker.add_train(mean(train_losses), mean(train_accuracies))
            tracker.add_val(val_loss, val_acc)
            tracker.print_stats(epoch)
def main_predict() -> None:
    detect_gpu()
    device = get_device()
    save_dir = path.join('modelling', 'model_2020_04_25_1')

    model = DropoutCNN((32, 64, 128, 256), (512, 128),
                       4,
                       device,
                       drop_dense_p=0.2,
                       drop_conv_p=0.2)
    data = FirstAugmentedDataset()
    tracker = PerformanceTracker(save_dir)
    model.train(data, 30, 20, tracker, learning_rate=0.0001)

    test_X, imgs_ids = data.get_test()
    pred_y = model.predict(test_X)
    create_submit(pred_y, imgs_ids, path.join(save_dir, 'submission.csv'))
Пример #4
0
def main_train() -> None:
    detect_gpu()
    device = get_device()
    save_dir = path.join('modelling', 'model_2020_04_08_1')

    model = CNNAugDataRegularized((32, 64, 128, 256), (1024, 128), 4, device)
    data = FirstAugmentedDataset()
    tracker = PerformanceTracker(save_dir)
    model.train(data, 60, 20, tracker, learning_rate=0.0001)

    tracker.graphs()
    tracker.save('metrics.csv')
def main_train() -> None:
    detect_gpu()
    device = get_device()
    save_dir = path.join('modelling', 'model_2020_04_26_2')

    model = TransferCNN(device)
    data = FirstAugmentedDataset()
    tracker = PerformanceTracker(save_dir)
    model.train(data, 20, 20, tracker, learning_rate = 0.0001)
    
    tracker.graphs()
    tracker.save('metrics.csv')
def main_try() -> None:
    train_X, train_y, val_X, val_y = get_128px_train_data()
    tracker = PerformanceTracker(
        os.path.join('modelling', 'model_2020_03_31_1'))
    model.train((train_X, train_y),
                45,
                10,
                val=(val_X, val_y),
                tracker=tracker)

    tracker.graphs()
    tracker.save('metrics.csv')
Пример #7
0
def main_train() -> None:
    detect_gpu()
    device = get_device()
    save_dir = path.join('modelling', 'model_2020_04_26_1')

    conv_filter_nums = (16, 32, 64, 64, 128, 128)
    neuron_nums = (512, 128)

    model = BigCNN(conv_filter_nums, neuron_nums, device)
    data = FirstAugmentedDataset()
    tracker = PerformanceTracker(save_dir)
    model.train(data, 40, 20, tracker, learning_rate=0.0001)

    tracker.graphs()
    tracker.save('metrics.csv')
def main_train() -> None:
    detect_gpu()
    device = get_device()
    save_dir = path.join('modelling', 'model_2020_04_25_1')

    model = DropoutCNN((32, 64, 128, 256), (512, 128),
                       4,
                       device,
                       drop_dense_p=0.2,
                       drop_conv_p=0.2)
    data = FirstAugmentedDataset()
    tracker = PerformanceTracker(save_dir)
    model.train(data, 40, 20, tracker, learning_rate=0.0001)

    tracker.graphs()
    tracker.save('metrics.csv')
Пример #9
0
def main() -> None:
    detect_gpu()
    device = get_device()
    save_dir = path.join('modelling', 'model_2020_05_10_1')

    model = VGGStyleNet(4, device)
    summary(model.cuda(), (3, 128, 128))
    print(model)
    data = FirstAugmentedDataset()
    tracker = PerformanceTracker(save_dir)
    model.train(data, 40, 64, tracker, learning_rate=0.00001)

    tracker.graphs()
    tracker.save('metrics.csv')

    test_X, imgs_ids = data.get_test()
    pred_y = model.predict(test_X)
    create_submit(pred_y, imgs_ids, path.join(save_dir, 'submission.csv'))
Пример #10
0
def main() -> None:
    detect_gpu()
    device = get_device()
    save_dir = path.join('modelling', 'model_2020_05_10_2')
    batch_size = 64
    epochs = 15

    model = VGGStyleBNNet(4, device)
    summary(model.cuda(), (3, 128, 128))
    print(model)
    data = FirstAugmentedDataset()
    tracker = PerformanceTracker(save_dir)
    try:
        model.train(data, epochs, batch_size, tracker, learning_rate=0.001)
    except KeyboardInterrupt:
        print('Training interrupted, writing stats...')
    finally:
        tracker.graphs()
        tracker.save('metrics.csv')

    test_X, imgs_ids = data.get_test()
    pred_y = model.predict(test_X, batch_size)
    create_submit(pred_y, imgs_ids, path.join(save_dir, 'submission.csv'))
    def train(
        self,
        train: Tuple[pd.DataFrame, pd.DataFrame],
        epochs: int,
        batch_size: int,
        tracker: PerformanceTracker = None,
        val: Tuple[pd.DataFrame, pd.DataFrame] = (None, None)
    ) -> None:
        train_X, train_y = train
        train_X, train_y = torch.from_numpy(train_X), torch.from_numpy(train_y)

        if val[0] is not None:
            val_X, val_y = val
            val_X, val_y = torch.from_numpy(val_X), torch.from_numpy(val_y)
            val_X, val_y = val_X.to(self.used_device), val_y.to(
                self.used_device)

        model = self
        model = model.to(self.used_device)
        optimizer = self.optim(self.parameters(), lr=0.0001)

        for epoch in range(epochs):
            batch_range = range(
                0, len(train_X), batch_size
            )  # from 0, to the len of x, stepping BATCH_SIZE at a time. [:50] ..for now just to dev
            train_losses = []
            train_accuracies = []

            for i in tqdm(batch_range):
                batch_X = train_X[i:i + batch_size].view(-1, 3, 128,
                                                         128).float()
                batch_y = train_y[i:i + batch_size].long()
                batch_y = torch.flatten(batch_y)
                batch_X, batch_y = batch_X.to(self.used_device), batch_y.to(
                    self.used_device)

                model.zero_grad()
                pred_y = model(batch_X)
                loss = self.loss_function(pred_y, batch_y)
                train_losses.append(loss.item())
                loss.backward()
                optimizer.step()
                pred_y_indices = torch.argmax(pred_y, dim=1)

                num_correct = int((pred_y_indices == batch_y).int().sum())
                accuracy = num_correct / batch_size
                train_accuracies.append(accuracy)

            if val[0] is not None:
                val_loss, val_acc = model.validate((val_X, val_y), batch_size)
            if tracker is not None:
                tracker.add_train(mean(train_losses), mean(train_accuracies))
                tracker.add_val(val_loss, val_acc)

            print(f'Epoch: {epoch + 1}')
            print(f'Train loss: {mean(train_losses)}')
            print(f'Train accuracy: {mean(train_accuracies)}')

            if tracker is not None:
                print(f'Val loss: {val_loss}')
                print(f'Val accuracy: {val_acc}')