Exemplo n.º 1
0
def main():
    x, y = dataloader(mode='train', reduced=False)
    x_test = dataloader(mode='test', reduced=False)
    '''For the dataloader there are two modes train and test, depending on the dataset loaded.'''
    x = standardize(x)
    x_test = standardize(x_test)
    config = Config(batch_size=120,
                    num_epochs=400,
                    learning_rate=5 * 10**-4,
                    lambda_=2.15443469003e-05,
                    mode='train')
    log_class = LogisticClassifier(config, (build_polynomial(x), y))
    log_class.train(show_every=10)
    predictions_test = log_class.predict_submission(
        log_class(build_polynomial(x_test)))

    create_csv_submission(np.arange(350000, 350000 + x_test.shape[0]),
                          predictions_test, 'dataset/submission_0x.csv')
Exemplo n.º 2
0
def main():
    x, y = dataloader(mode='train', reduced=False)
    x_test = dataloader(mode='test', reduced=False)
    x = standardize(x)
    x_test = standardize(x_test)
    config = Config(batch_size=120,
                    num_epochs=300,
                    learning_rate=5 * 10**-4,
                    lambda_=2.15443469003e-05,
                    mode='train')
    ensemble = EnsembleClassifiers(config,
                                   build_polynomial(x),
                                   y,
                                   50,
                                   LogisticClassifier,
                                   label='ensemble_50_log')
    ensemble.train()
    predictions_test = ensemble.predict(ensemble(build_polynomial(x_test)))
    create_csv_submission(np.arange(350000, 350000 + x_test.shape[0]),
                          predictions_test, 'dataset/submission_0x.csv')
def find_best_batch(batch_sizes):
    x, y = dataloader(mode='train', reduced=False)
    x = standardize(x)
    best_size = 0
    best_accuracy = 0
    for idx, batch_size in enumerate(batch_sizes):
        print('Ensemble nr ' + str(idx) + 30 * '=')
        config = Config(batch_size=batch_size, num_epochs=300, learning_rate=5 * 10 ** -4,
                        lambda_= 2.16e-05)
        ensemble = EnsembleClassifiers(config, build_polynomial(x), y, 2, LogisticClassifier,
                                       label='ensemble_' + str(idx))
        ensemble.train()
        print("ensemble accuracy " + str(ensemble.accuracy) + 30 * "=")
        if ensemble.accuracy > best_accuracy:
            best_accuracy = ensemble.accuracy
            best_size = batch_size
        print("best_lambda :", best_size)
def find_best_regularizer(lambdas):
    """Hyperparamenter search for regularization constant"""
    x, y = dataloader(mode='train', reduced=False)
    x = standardize(x)
    best_lambda = 0
    best_accuracy = 0
    for idx, lambda_ in enumerate(lambdas):
        print('Ensemble nr ' + str(idx) + 30 * '=')
        config = Config(batch_size=200, num_epochs=200, learning_rate=5 * 10 ** -4, lambda_=lambda_)
        ensemble = EnsembleClassifiers(config, build_polynomial(x), y, 10, LogisticClassifier,
                                       label='ensemble_' + str(idx))
        ensemble.train()
        print("ensemble accuracy " + str(ensemble.accuracy) + 30 * "=")
        if ensemble.accuracy > best_accuracy:
            best_accuracy = ensemble.accuracy
            best_lambda = lambda_
        print("best_lambda :", best_lambda)
from torch import optim
import torch
import torch.utils.data
import numpy as np
from torch.autograd import Variable
from torch import nn
import torch.nn.functional as f
from src.utils import dataloader, standardize, split_data, build_polynomial

x, y = dataloader(mode='train', reduced=False)
x = standardize(x)
train_dataset, test_dataset = split_data(x, y, ratio=0.9)
test_data, test_target = test_dataset
train_data, train_target = train_dataset
test_data = build_polynomial(test_data)
train_data = build_polynomial(train_data)
num_features = np.shape(train_data)[1]

train = torch.utils.data.TensorDataset(
    torch.from_numpy(train_data).type(torch.FloatTensor),
    torch.from_numpy(train_target).type(torch.LongTensor))
train_loader = torch.utils.data.DataLoader(train, batch_size=128, shuffle=True)
test = torch.utils.data.TensorDataset(
    torch.from_numpy(test_data).type(torch.FloatTensor),
    torch.from_numpy(test_target).type(torch.LongTensor))
test_loader = torch.utils.data.DataLoader(test, batch_size=128, shuffle=True)


class SimpleNN(torch.nn.Module):
    def __init__(self,
                 batch_size=128,
 # find_best_regularizer(np.logspace(-5, -2, 10))
 x, y = dataloader(mode='train', reduced=False)
 x_test = dataloader(mode='test', reduced=False)
 # print(x.shape)
 # print(x_test.shape)
 x = standardize(x)
 x_test = standardize(x_test)
 # train_dataset, test_dataset = split_data(x, y, ratio=0.9)
 # train_set = (build_polynomial(train_dataset[0]), train_dataset[1])
 # test_set = (build_polynomial(test_dataset[0]), test_dataset[1])
 # # # # x = dataloader(mode='test', reduced=False)
 # # # # x = standardize(x)
 # # # # x = build_polynomial(x)
 config = Config(batch_size=120, num_epochs=400, learning_rate=5 * 10 ** -4,
                 lambda_=2.15443469003e-05, mode='train')
 log_class = LogisticClassifier(config, (build_polynomial(x), y))
 log_class.train(show_every=1p)
 predictions_test = log_class.predict_submission(log_class(build_polynomial(x_test)))
 log_class.plot_accuracy()
 log_class.plot_convergence()
 ensemble = EnsembleClassifiers(config, build_polynomial(x), y, 5, LogisticClassifier,
                                label='ensemble_2_log')
 #
 ensemble.train()
 # ensemble.plot_convergence()
 # ensemble.plot_accuracy()
 # ensemble.save()
 # # ensemble.load_weights()
 predictions_test = ensemble.predict(ensemble(build_polynomial(x_test)))
 create_csv_submission(np.arange(350000, 350000 + x_test.shape[0]), predictions_test,
                       'dataset/submission_10.csv')