Python apply_momentum示例，lasagne.updates.apply_momentum Python示例

示例#1

0

显示文件

def sgdWithLrsClip(loss_or_grads,
                   params,
                   learning_rate=.01,
                   mu_lr=.01,
                   si_lr=.001,
                   focused_w_lr=.01,
                   momentum=.9,
                   verbose=False):
    '''
    Sames as sgdWithLrs bu applies clips after updates
    '''
    from collections import OrderedDict
    from lasagne.updates import get_or_compute_grads, apply_momentum
    grads = get_or_compute_grads(loss_or_grads, params)
    updates = OrderedDict()
    #momentum_params_list =[]
    f32 = np.float32
    if verbose:
        print("Params List", params)
    for param, grad in zip(params, grads):
        if verbose:
            print("param name", param.name, "shape:", param.eval().shape)
        #print("param name", param.name, "shape:", param.get_value().shape)

        #grad = clip_tensor(grad, -0.001, 0.001)
        if param.name.find('focus') >= 0 and param.name.find('mu') >= 0:
            updates[param] = param - mu_lr * grad
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            updates[param] = clip_tensor(updates[param], f32(0.01), f32(0.99))

        elif param.name.find('focus') >= 0 and param.name.find('si') >= 0:
            updates[param] = param - si_lr * grad
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            updates[param] = clip_tensor(updates[param], f32(0.01), f32(0.5))

        elif param.name.find('focus') >= 0 and param.name.find('W') >= 0:
            updates[param] = param - (focused_w_lr * grad)
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            #updates[param] =clip_tensor(updates[param], -0.5, 0.5)
        else:
            updates[param] = param - learning_rate * grad
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            #if param.name.find('W')>=0:
            #print (param, grad, learning_rate)
    return updates

示例#2

0

显示文件

文件： nn_.py 项目： darioizzo/optimal_landing

def get_network(model):

    input_data = tensor.dmatrix('x')
    targets_var = tensor.dmatrix('y')

    network = layers.InputLayer((model['batch_size'], model['input_vars']), input_data)

    nonlin = nonlinearities.rectify
    if model['hidden_nonlinearity'] != 'ReLu':
        nonlin = nonlinearities.tanh

    prev_layer = network

    for l in range(model['nlayers']):
        fc = layers.DenseLayer(prev_layer, model['units'], nonlinearity=nonlin)
        if model['dropout']:
            fc = layers.DropoutLayer(fc, 0.5)
        prev_layer = fc

    output_lin = None
    if model['output_mode'] == OUTPUT_LOG:
        output_lin = nonlinearities.tanh
    output_layer = layers.DenseLayer(prev_layer, 1, nonlinearity=output_lin)

    predictions = layers.get_output(output_layer)

    if model['output_mode'] == OUTPUT_BOUNDED:
        (minth, maxth) = model['maxmin'][model['control']]
        maxt = theano.shared(np.ones((model['batch_size'], 1)) * maxth)
        mint = theano.shared(np.ones((model['batch_size'], 1)) * minth)
        predictions = tensor.min(tensor.concatenate([maxt, predictions], axis=1), axis=1)
        predictions = tensor.reshape(predictions, (model['batch_size'], 1))
        predictions = tensor.max(tensor.concatenate([mint, predictions], axis=1), axis=1)
        predictions = tensor.reshape(predictions, (model['batch_size'], 1))

    loss = objectives.squared_error(predictions, targets_var)
    loss = objectives.aggregate(loss, mode='mean')

    params = layers.get_all_params(output_layer)

    test_prediction = layers.get_output(output_layer, deterministic=True)
    test_loss = objectives.squared_error(test_prediction,  targets_var)
    test_loss = test_loss.mean()

    updates_sgd = updates.sgd(loss, params, learning_rate=model['lr'])
    ups = updates.apply_momentum(updates_sgd, params, momentum=0.9)

    train_fn = theano.function([input_data, targets_var], loss, updates=ups)
    pred_fn = theano.function([input_data], predictions)
    val_fn = theano.function([input_data, targets_var], test_loss)

    return {'train': train_fn, 'eval': val_fn, 'pred': pred_fn, 'layers': output_layer}

示例#3

0

显示文件

文件： dA.py 项目： atabakd/Continuous_affordances

    def get_cost_updates(self, corruption_level, learning_rate, noise = 0.0, momentum=0):
        """ This function computes the cost and the updates for one trainng
        step of the dA """

        tilde_x = self.get_corrupted_input(self.x, corruption_level, noise)
        y = self.get_hidden_values(tilde_x)
        z = self.get_reconstructed_input(y)
        L = - T.sum(self.desired * T.log(z) + (1 - self.desired) * T.log(1 - z), axis=1)
        cost = T.mean(L)

        # adagrad with momentum on cost
        updates_ada = adagrad(cost, self.params, learning_rate=learning_rate)
        updates = apply_momentum(updates_ada, self.params, momentum=momentum)

        return (cost, updates)

示例#4

0

显示文件

def build_model(n_input,
                n_hidden,
                optimizer=adagrad,
                l2_weight=1e-4,
                l1_weight=1e-2):
    '''
	build NN model to estimating model function
	'''
    global LR

    input_A = L.InputLayer((None, n_input), name='A')
    layer_A = L.DenseLayer(input_A, n_hidden, b=None, nonlinearity=identity)

    input_B = L.InputLayer((None, n_input), name='B')
    layer_B = L.DenseLayer(input_B, n_hidden, b=None, nonlinearity=identity)

    merge_layer = L.ElemwiseSumLayer((layer_A, layer_B))

    output_layer = L.DenseLayer(merge_layer, 1, b=None,
                                nonlinearity=identity)  # output is scalar

    x1 = T.matrix('x1')
    x2 = T.matrix('x2')
    y = T.matrix('y')

    out = L.get_output(output_layer, {input_A: x1, input_B: x2})
    params = L.get_all_params(output_layer)
    loss = T.mean(squared_error(out, y))

    # add l1 penalty
    l1_penalty = regularize_layer_params([layer_A, layer_B, output_layer], l1)

    # add l2 penalty
    l2_penalty = regularize_layer_params([layer_A, layer_B, output_layer], l2)

    # get loss + penalties
    loss = loss + l1_penalty * l1_weight + l2_penalty * l2_weight

    updates_sgd = optimizer(loss, params, learning_rate=LR)
    updates = apply_momentum(updates_sgd, params, momentum=0.9)
    # updates = optimizer(loss,params,learning_rate=LR)

    f_train = theano.function([x1, x2, y], loss, updates=updates)
    f_test = theano.function([x1, x2, y], loss)
    f_out = theano.function([x1, x2], out)

    return f_train, f_test, f_out, output_layer

示例#5

0

显示文件

文件： dA_class.py 项目： digirak/TIFR-code

    def get_cost_updates(self, corrupted_input, learning_rate):
        """ This function computes the cost and the updates for one trainng
        step of the dA """

	tilde_x=corrupted_input
        y = self.get_hidden_values(tilde_x)
        z = self.get_reconstructed_input(y)
	#z=corrupted_input
        # note : we sum over the size of a datapoint; if we are using
        #        minibatches, L will be a vector, with one entry per
        #        example in minibatch
 #       L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z))
 	L=categorical_crossentropy(z,self.x)

       #L = (self.x * T.log(z) + (1 - self.x) * T.log(1 - z))
	#cost=L.mean()

#	temp=(self.x*T.log(z)+(1-self.x)*T.log(1-z))
#	L=-T.sum(temp)
        # note : L is now a vector, where each element is the
        #        cross-entropy cost of the reconstruction of the
        #        corresponding example of the minibatch. We need to
        #        compute the average of all these to get the cost of
        #        the minibatch
        cost = T.mean(L)
#	print cost

	reg=1e-8*lasagne.regularization.l2(self.params[0])
	cost=cost+reg

        # compute the gradients of the cost of the `dA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params,add_names='True')
	updates_sgd=sgd(cost,self.params,learning_rate)
	updates_dic=apply_momentum(updates_sgd, self.params, momentum=0.9)
	updates=updates_dic.items()
        # generate the list of updates
    #    updates = [
     #       (param, param - learning_rate * gparam)
      #      for param, gparam in zip(self.params, gparams)
       # ]
       	



        return (cost, updates)

示例#6

0

显示文件

def sgdWithLrs(loss_or_grads,
               params,
               learning_rate=.01,
               mu_lr=.01,
               si_lr=.001,
               focused_w_lr=.01,
               momentum=.9):
    '''
    # This function provides SGD with different learning rates to focus params mu, si, w
    '''
    from collections import OrderedDict
    from lasagne.updates import get_or_compute_grads, apply_momentum
    grads = get_or_compute_grads(loss_or_grads, params)
    updates = OrderedDict()
    momentum_params_list = []
    print(params)
    for param, grad in zip(params, grads):
        # import pdb; pdb.set_trace()
        #grad = clip_tensor(grad, -0.01, 0.01)
        if param.name.find('focus') >= 0 and param.name.find('mu') >= 0:
            updates[param] = param - mu_lr * grad
            momentum_params_list.append(param)

        elif param.name.find('focus') >= 0 and param.name.find('si') >= 0:
            updates[param] = param - si_lr * grad
            #momentum_params_list.append(param)

        elif param.name.find('focus') >= 0:
            updates[param] = param - (focused_w_lr * grad)
            momentum_params_list.append(param)

        else:
            updates[param] = param - learning_rate * grad
            momentum_params_list.append(param)
            #print (param, grad, learning_rate)
    return apply_momentum(updates,
                          params=momentum_params_list,
                          momentum=momentum)

示例#7

0

显示文件

文件： nn.py 项目： fagan2888/optimal_landing

def get_network(model):

    input_data = tensor.dmatrix('x')
    targets_var = tensor.dmatrix('y')

    network = layers.InputLayer((model['batch_size'], model['input_vars']),
                                input_data)

    nonlin = nonlinearities.rectify
    if model['hidden_nonlinearity'] != 'ReLu':
        nonlin = nonlinearities.tanh

    prev_layer = network

    for l in range(model['nlayers']):
        W = None
        if model['hidden_nonlinearity'] == 'ReLu':
            W = lasagne.init.GlorotUniform('relu')
        else:
            W = lasagne.init.GlorotUniform(1)

        fc = layers.DenseLayer(prev_layer,
                               model['units'],
                               nonlinearity=nonlin,
                               W=W)
        if model['dropout']:
            fc = layers.DropoutLayer(fc, 0.5)
        prev_layer = fc

    output_lin = None
    if model['output_mode'] == OUTPUT_LOG:
        output_lin = nonlinearities.tanh
    output_layer = layers.DenseLayer(prev_layer, 1, nonlinearity=output_lin)

    predictions = layers.get_output(output_layer)

    if model['output_mode'] != OUTPUT_LOG:
        (minth, maxth) = model['maxmin'][model['control']]
        maxt = theano.shared(np.ones((model['batch_size'], 1)) * maxth)
        mint = theano.shared(np.ones((model['batch_size'], 1)) * minth)
        predictions = tensor.min(tensor.concatenate([maxt, predictions],
                                                    axis=1),
                                 axis=1)
        predictions = tensor.reshape(predictions, (model['batch_size'], 1))
        predictions = tensor.max(tensor.concatenate([mint, predictions],
                                                    axis=1),
                                 axis=1)
        predictions = tensor.reshape(predictions, (model['batch_size'], 1))

    if model['output_mode'] == OUTPUT_NO:
        prediction_unboun = layers.get_output(output_layer)
        loss = objectives.squared_error(prediction_unboun, targets_var)
    else:
        loss = objectives.squared_error(predictions, targets_var)

    loss = objectives.aggregate(loss, mode='mean')

    params = layers.get_all_params(output_layer)
    #    test_prediction = layers.get_output(output_layer, deterministic=True)  #fix for dropout
    test_loss = objectives.squared_error(predictions, targets_var)
    test_loss = test_loss.mean()

    if model['hidden_nonlinearity'] == 'ReLu':
        model['lr'] *= 0.5
    updates_sgd = updates.sgd(loss, params, learning_rate=model['lr'])
    ups = updates.apply_momentum(updates_sgd, params, momentum=0.9)

    train_fn = theano.function([input_data, targets_var], loss, updates=ups)
    pred_fn = theano.function([input_data], predictions)
    #    pred_fn = theano.function([input_data], prediction_unboun)
    val_fn = theano.function([input_data, targets_var], test_loss)

    return {
        'train': train_fn,
        'eval': val_fn,
        'pred': pred_fn,
        'layers': output_layer
    }

示例#8

0

显示文件

文件： Layers.py 项目： massimilianocomin/Class-Project-IFT-6266

 def rmsprop_momentum(loss, params, eta=1e-3, alpha=0.9, **kwargs):
     rms = updt.rmsprop(loss, params, learning_rate=eta, **kwargs)
     return updt.apply_momentum(rms, params, momentum=alpha)

示例#9

0

显示文件

all_params = get_all_params(layers, trainable=True)
# compute loss
generation = lasagne.layers.get_output(net)
generation = generation.dimshuffle([0, 2, 3, 1])
# mean squared error
train_loss = lasagne.objectives.squared_error(
    generation.reshape((generation.shape[0], -1)),
    img_batch_target.reshape((img_batch_target.shape[0], -1)))
train_loss = train_loss.sum(axis=1)
train_loss = train_loss.mean()
# update
lrn_rate = T.cast(theano.shared(options['learning_rate']),
                  'floatX')  # we can use dynamic learning rate
optimizer = sgd
updates_sgd = optimizer(train_loss, all_params, learning_rate=lrn_rate)
updates = apply_momentum(updates_sgd, all_params, momentum=0.95)
# train
_train = theano.function([img_batch, pose_code, img_batch_target],
                         train_loss,
                         updates=updates,
                         allow_input_downcast=True)

# ------------ training ----------------
print("Train...")
if options['start_epoch'] == 0:
    start_epoch = 0
else:
    model.load_model(options['init_model_from'])
    start_epoch = options['start_epoch']

nb_epoch = options['max_epochs']

示例#10

0

显示文件

def sgdWithWeightSupress(loss_or_grads,
                         params,
                         learning_rate=.01,
                         mu_lr=.01,
                         si_lr=.001,
                         focused_w_lr=.01,
                         momentum=.9,
                         verbose=False):
    ''' this update function masks focus weights after they are updated.
    The idea is that weights outside of the focus function must be suppressed
    to prevent weight memory when focus changes it            print("Hey weight shape::",mu_si_w[param.name].shape)
s position
    
    To do this I get mu and si values of the focus layer, calculate a Gauss,
    window scale it so the center is 1 but outside is close to 0, and then multiply
    it with the weights.
    '''
    from collections import OrderedDict
    from lasagne.updates import get_or_compute_grads, apply_momentum
    grads = get_or_compute_grads(loss_or_grads, params)
    updates = OrderedDict()
    #momentum_params_list =[]
    if verbose:
        print(params)
    for param, grad in zip(params, grads):

        #grad = clip_tensor(grad, -0.001, 0.001)
        if param.name.find('focus') >= 0 and param.name.find('mu') >= 0:
            updates[param] = param - mu_lr * grad
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            updates[param] = clip_tensor(updates[param], 0.01, 0.99)

        elif param.name.find('focus') >= 0 and param.name.find('si') >= 0:
            updates[param] = param - si_lr * grad
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            updates[param] = clip_tensor(updates[param], 0.01, 0.5)

        elif param.name.find('focus') >= 0 and param.name.find('W') >= 0:
            param_layer_name = param.name.split(".")[0]
            mu_name = param_layer_name + '.mu'

            si_name = param_layer_name + ".si"
            mu_si_w = get_params_values_wkey(params,
                                             [mu_name, si_name, param.name])
            from focusing import U_numeric
            us = U_numeric(np.linspace(0, 1, mu_si_w[param.name].shape[0]),
                           mu_si_w[mu_name],
                           mu_si_w[si_name],
                           1,
                           normed=False)

            updates[param] = (param - (focused_w_lr * grad))

            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            # here we are masking the weights, so they can not stay out of envelope
            us[us > 0.1] = 1.0
            updates[param] = updates[param] * us.T
            #updates[param] = updates[param]*, -0.5, 0.5)
        else:
            updates[param] = param - learning_rate * grad
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            #print (param, grad, learning_rate)
    return updates

示例#11

0

显示文件

def sgdWithLrLayers(loss_or_grads,
                    params,
                    learning_rate=.01,
                    mu_lr=.01,
                    si_lr=.001,
                    focused_w_lr=.01,
                    momentum=.9):
    '''
    # This function updates each layer parameters with a different learning rate. 
    Under dev.
    '''
    from collections import OrderedDict
    from lasagne.updates import get_or_compute_grads, apply_momentum
    grads = get_or_compute_grads(loss_or_grads, params)
    updates = OrderedDict()
    #momentum_params_list =[]
    #print(params)
    for param, grad in zip(params, grads):
        # import pdb; pdb.set_trace()
        grad = clip_tensor(grad, -0.01, 0.01)
        if param.name.find('focus') >= 0 and param.name.find('mu') >= 0:
            updates[param] = param - mu_lr * grad
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum / 2)
            updates[param] = clip_tensor(updates[param], 0.05, 0.95)
            #momentum_params_list.append(param)
            #print (param,mu_lr)
            #print (param, grad, mu_lr)
        elif param.name.find('focus') >= 0 and param.name.find('si') >= 0:
            updates[param] = param - si_lr * grad
            #momentum_params_list.append(param)
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            updates[param] = clip_tensor(updates[param], 0.01, 0.5)

            #print (param,si_lr)
            #print (param, grad, si_lr)
            #print (param, grad, scaler_lr)
        elif param.name.find('focus') >= 0 and (param.name.find('W') >= 0 or
                                                param.name.find('bias') >= 0):
            level = int(str.split(param.name, '-')[1].split('.')[0])
            #print(param.name, level)
            updates[param] = param - (learning_rate * (1. /
                                                       (level + 1))) * grad
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            if (param.name.find('W') >= 0):
                updates[param] = clip_tensor(updates[param], -0.4, 0.4)
            #momentum_params_list.append(param)
            #print (param,focused_w_lr)
        elif param.name.find('W') >= 0 or param.name.find('b') >= 0:
            if param.name.find('-') >= 0:
                level = int(str.split(param.name, '-')[1].split('.')[0])
                updates[param] = param - (learning_rate * (1. / level)) * grad
                updates = apply_momentum(updates,
                                         params=[param],
                                         momentum=momentum)
            else:
                updates[param] = param - (learning_rate) * grad
            #momentum_params_list.append(param)
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            if (param.name.find('W') >= 0):
                updates[param] = clip_tensor(updates[param], -0.4, 0.4)

            if (param.name.find('b') >= 0):
                updates[param] = clip_tensor(updates[param], -1.0, 1.0)
        else:
            updates[param] = param - (learning_rate) * grad
            #momentum_params_list.append(param)
            updates = apply_momentum(updates,
                                     params=[param],
                                     momentum=momentum)
            if (param.name.find('beta') >= 0):
                updates[param] = clip_tensor(updates[param], -1., 1.)
            #print (param, grad, learning_rate)

    return updates