def test_basic_momentum(): a = shared_floatx([3, 4]) cost = (a**2).sum() steps, updates = BasicMomentum(0.5).compute_steps( OrderedDict([(a, tensor.grad(cost, a))])) f = theano.function([], [steps[a]], updates=updates) assert_allclose(f()[0], [6., 8.]) assert_allclose(f()[0], [9., 12.]) assert_allclose(f()[0], [10.5, 14.])
def test_basic_nesterov_momentum(): a = shared_floatx([3, 4]) cost = (a**2).sum() steps, updates = BasicNesterovMomentum(0.5).compute_steps( OrderedDict([(a, tensor.grad(cost, a))])) f = theano.function([], [steps[a]], updates=updates) steps_classic, updates_classic = BasicMomentum(0.5).compute_steps( OrderedDict([(a, tensor.grad(cost, a))])) f_classic = theano.function([], [steps_classic[a]], updates=updates_classic) f_classic() # One call for the "peek ahead" of the Nesterov momentum. assert_allclose(f()[0], f_classic()[0]) assert_allclose(f()[0], f_classic()[0]) assert_allclose(f()[0], f_classic()[0])
def test_basic_momentum_broadcastable(): verify_broadcastable_handling(BasicMomentum(0.5))
n_entities = 550 embed_size = 200 ctx_lstm_size = [256] ctx_skip_connections = True question_lstm_size = [256] question_skip_connections = True attention_mlp_hidden = [100] attention_mlp_activations = [Tanh()] out_mlp_hidden = [] out_mlp_activations = [] step_rule = CompositeRule([ RMSProp(decay_rate=0.95, learning_rate=5e-5), BasicMomentum(momentum=0.9) ]) dropout = 0.2 w_noise = 0. valid_freq = 1000 save_freq = 1000 print_freq = 100 weights_init = IsotropicGaussian(0.01) biases_init = Constant(0.)
out_dim = 1 hidden_dim = 64 activation_function = Tanh() activation_function_name = 'Tanh' batch_size = 100 w_noise_std = 0.01 i_dropout = 0.5 proportion_train = 0.9 algo = 'RMS' learning_rate_value = 1e-5 momentum_value = 0.9 decay_rate_value = 0 StepClipping_value = 2 step_rule = CompositeRule([RMSProp(learning_rate=learning_rate_value), #decay_rate=decay_rate_value, BasicMomentum(momentum=momentum_value), StepClipping(StepClipping_value)]) print_freq = 1000 valid_freq = 10000 save_freq = 10000 class Model(): def __init__(self): inp = tensor.tensor3('input') inp = inp.dimshuffle(1,0,2) target = tensor.matrix('target') target = target.reshape((target.shape[0],)) product = tensor.lvector('product') missing = tensor.eq(inp, 0) train_input_mean = 1470614.1 train_input_std = 3256577.0