def trainData(name, data, regularization=eye_loss, alpha=0.01, n_epochs=300, learning_rate=1e-3, batch_size=4000, r=None, test=False): ''' return validation auc, average precision, score1 if test is true, combine train and val and report on test performance ''' m = data if test: name = 'test' + name xtrain = np.vstack([m.xtrain, m.xval]) xval = m.xte ytrain = np.hstack([m.ytrain, m.yval]) yval = m.yte else: xtrain = m.xtrain xval = m.xval ytrain = m.ytrain yval = m.yval # note: for cross validation, just split data into n fold and # choose appropriate train_data and valdata from those folds # not doing here for simplicity d = m.r.size(0) train_data = TensorDataset(*map(lambda x: x.data, prepareData(xtrain, ytrain))) data = DataLoader(train_data, batch_size=batch_size, shuffle=True) valdata = TensorDataset(*map(lambda x: x.data, prepareData(xval, yval))) valdata = DataLoader(valdata, batch_size=4000, shuffle=True) n_output = 2 # binary classification task model = LR(d, n_output) reg_parameters = model.i2o.weight t = Trainer(model, lr=learning_rate, risk_factors=m.r, alpha=alpha, regularization=regularization, reg_parameters=reg_parameters, name=name) losses, vallosses = t.fit(data, n_epochs=n_epochs, print_every=1, valdata=valdata) # report statistics val_auc = model_auc(model, valdata) ap = calcAP(m.r.data.numpy(), (reg_parameters[1] - reg_parameters[0]).data.numpy()) t, s1 = sweepS1(model, valdata) sp = sparsity((reg_parameters[1]-reg_parameters[0]).data.numpy()) joblib.dump((val_auc, ap, s1, sp), 'models/' + name + '.pkl') return val_auc, ap, s1, sp
def __init__(self, data, n_cpus=10): self.tasks = [] self.hyperparams = [] self.n_cpus = n_cpus self.data = data valdata = TensorDataset(*map(lambda x: x.data, prepareData(data.xval, data.yval))) self.valdata = DataLoader(valdata, batch_size=4000, shuffle=True)
def __init__(self, data, p): # p is param parser self.tasks = [] self.hyperparams = [] self.n_cpus = p.n_cpus self.data = data valdata = TensorDataset( *map(lambda x: x.data, prepareData(data.xval, data.yval))) self.valdata = DataLoader(valdata, batch_size=p.batch_size, shuffle=True) self.p = p
def model_acc(model, x, y): x, _ = prepareData(x, y) yhat = np.argmax(to_np(model(x)), 1) return accuracy_score(y, yhat)
n, d = 1000, 2 def gendata(): x = np.random.randn(n, d) y = (x.sum(1) > 0).astype(np.int) return x, y xtr, ytr = gendata() xte, yte = gendata() r = to_var(torch.FloatTensor([0, 1])) train_data = TensorDataset(*map(lambda x: x.data, prepareData(xtr, ytr))) data = DataLoader(train_data, batch_size=100, shuffle=True) n_output = 2 # binary classification task model = LR(d, n_output) learning_rate = 0.01 alpha = 0.08 # regularization strength reg_parameters = model.i2o.weight t = Trainer(model, lr=learning_rate, risk_factors=r, alpha=alpha, regularization=eye_loss, reg_parameters=reg_parameters) t.fit(data, n_epochs=60, print_every=50)