@pytest.mark.parametrize( 'lm, dataset', [ pytest.param( lm(optimizer=copy.deepcopy(optimizer), initializer=initializer, l2=0), dataset, id=f'{lm.__name__} - {optimizer} - {initializer}' ) for lm, dataset in [ (lm.LinearRegression, datasets.TrumpApproval()), (lm.LogisticRegression, datasets.Bananas()) ] for optimizer, initializer in itertools.product( [ optim.AdaBound(), optim.AdaDelta(), optim.AdaGrad(), optim.AdaMax(), optim.Adam(), optim.AMSGrad(), # TODO: check momentum optimizers # optim.Momentum(), # optim.NesterovMomentum(), optim.RMSProp(), optim.SGD() ], [ optim.initializers.Zeros(), optim.initializers.Normal(mu=0, sigma=1, seed=42) ] )
KERAS_EPS = K.epsilon() LR = .01 OPTIMIZERS = { 'SGD': (optim.SGD(lr=LR), functools.partial(torch.optim.SGD, lr=LR), optimizers.SGD(lr=LR)), 'Adam': (optim.Adam(lr=LR, beta_1=.9, beta_2=.999, eps=KERAS_EPS), functools.partial(torch.optim.Adam, lr=LR, betas=(.9, .999), eps=KERAS_EPS), optimizers.Adam(lr=LR, beta_1=.9, beta_2=.999)), 'AdaDelta': (optim.AdaDelta(rho=.95, eps=KERAS_EPS), functools.partial(torch.optim.Adadelta, rho=.95, eps=KERAS_EPS), optimizers.Adadelta(rho=.95)), 'AdaGrad': (optim.AdaGrad(lr=LR, eps=KERAS_EPS), functools.partial(torch.optim.Adagrad, lr=LR), optimizers.Adagrad(lr=LR)), 'Momentum': (optim.Momentum(lr=LR, rho=.1), functools.partial(torch.optim.SGD, lr=LR, momentum=.1), optimizers.SGD(lr=LR, momentum=.1)) } def add_intercept(x): return {**x, 'intercept': 1.}
lr = config['lr'] AdaBound_lr = config['AdaBound_lr'] rho = config['rho'] beta_1 = config['beta_1'] beta_2 = config['beta_2'] eps = config['eps'] gamma = config['gamma'] final_lr = config['final_lr'] alpha = config['alpha'] FTRL_l1 = config['FTRL_l1'] FTRL_l2 = config['FTRL_l2'] if (opt == "AdaBound"): optimizer = optim.AdaBound(lr, beta_1, beta_2, eps, gamma, final_lr) elif (opt == "AdaDelta"): optimizer = optim.AdaDelta(rho, eps) elif (opt == "AdaGrad"): optimizer = optim.AdaGrad(lr, eps) elif (opt == "Adam"): optimizer = optim.Adam(lr, beta_1, beta_2, eps) elif (opt == "FTRLProximal"): optimizer = optim.FTRLProximal(alpha, beta, l1, l2) elif (opt == "Momentum"): optimizer = optim.Momentum(lr, rho) elif (opt == "RMSProp"): optimizer = optim.RMSProp(lr, rho, eps) elif (opt == "VanillaSGD"): optimizer = optim.VanillaSGD(lr) elif (opt == "NesterovMomentum"): optimizer = optim.NesterovMomentum(lr, rho) else: