Python SVHN.X примеры использования

Язык программирования: Python

Пространство имен/Пакет: pylearn2.datasets.svhn

Класс/Тип: SVHN

Метод/Функция: X

Примеров на hotexamples.com: 7

Python SVHN.X - 7 примеров найдено. Это лучшие примеры Python кода для pylearn2.datasets.svhn.SVHN.X, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

SVHN(9)

X(6)

y(5)

apply_preprocessor(1)

get_batch_design(1)

get_batch_topo(1)

get_design_matrix(1)

get_targets(1)

get_test_set(1)

Пример #1

Показать файл

Файл: svhn.py Проект: pranavsreedhar/BinaryConnect-1

        if stochastic == True:
            stochastic_training = True
        else:
            binary_test = True

    print 'Loading the dataset'

    train_set = SVHN(which_set='splitted_train', axes=['b', 'c', 0, 1])

    valid_set = SVHN(which_set='valid', axes=['b', 'c', 0, 1])

    test_set = SVHN(which_set='test', axes=['b', 'c', 0, 1])

    # bc01 format
    # print train_set.X.shape
    train_set.X = np.reshape(train_set.X, (598388, 3, 32, 32))
    valid_set.X = np.reshape(valid_set.X, (6000, 3, 32, 32))
    test_set.X = np.reshape(test_set.X, (26032, 3, 32, 32))

    # for hinge loss
    train_set.y = np.subtract(np.multiply(2, train_set.y), 1.)
    valid_set.y = np.subtract(np.multiply(2, valid_set.y), 1.)
    test_set.y = np.subtract(np.multiply(2, test_set.y), 1.)

    print 'Creating the model'

    class DeepCNN(Network):
        def __init__(self, rng):

            Network.__init__(self, n_hidden_layer=8, BN=BN)

Пример #2

Показать файл

Файл: svhn.py Проект: hterada/b-dcgan

    train_set = SVHN(
            which_set= 'splitted_train',
            axes= ['b', 'c', 0, 1])
     
    valid_set = SVHN(
        which_set= 'valid',
        axes= ['b', 'c', 0, 1])
    
    test_set = SVHN(
        which_set= 'test',
        axes= ['b', 'c', 0, 1])
    
    # bc01 format
    # Inputs in the range [-1,+1]
    # print("Inputs in the range [-1,+1]")
    train_set.X = np.reshape(np.subtract(np.multiply(2./255.,train_set.X),1.),(-1,3,32,32))
    valid_set.X = np.reshape(np.subtract(np.multiply(2./255.,valid_set.X),1.),(-1,3,32,32))
    test_set.X = np.reshape(np.subtract(np.multiply(2./255.,test_set.X),1.),(-1,3,32,32))
    # print(np.max(train_set.X))
    # print(np.min(train_set.X))
    
    # for hinge loss (targets are already onehot)
    train_set.y = np.subtract(np.multiply(2,train_set.y),1.)
    valid_set.y = np.subtract(np.multiply(2,valid_set.y),1.)
    test_set.y = np.subtract(np.multiply(2,test_set.y),1.)

    print('Building the CNN...') 
    
    # Prepare Theano variables for inputs and targets
    input = T.tensor4('inputs')
    target = T.matrix('targets')

Пример #3

Показать файл

Файл: svhn.py Проект: hihihippp/BinaryConnect

            path= "${SVHN_LOCAL_PATH}",
            axes= ['b', 'c', 0, 1])
     
    valid_set = SVHN(
        which_set= 'valid',
        path= "${SVHN_LOCAL_PATH}",
        axes= ['b', 'c', 0, 1])
    
    test_set = SVHN(
        which_set= 'test',
        path= "${SVHN_LOCAL_PATH}",
        axes= ['b', 'c', 0, 1])
    
    # bc01 format
    # print train_set.X.shape
    train_set.X = np.reshape(train_set.X,(598388,3,32,32))
    valid_set.X = np.reshape(valid_set.X,(6000,3,32,32))
    test_set.X = np.reshape(test_set.X,(26032,3,32,32))
    
    # for hinge loss
    train_set.y = np.subtract(np.multiply(2,train_set.y),1.)
    valid_set.y = np.subtract(np.multiply(2,valid_set.y),1.)
    test_set.y = np.subtract(np.multiply(2,test_set.y),1.)
    
    print 'Creating the model'
    
    class DeepCNN(Network):

        def __init__(self, rng):

            Network.__init__(self, n_hidden_layer = 8, BN = BN)

Пример #4

Показать файл

Файл: svhn_evaluation_random_labels.py Проект: hockeybro12/BinaryNetGeneralizationExperiments

    print("shuffle_parts = " + str(shuffle_parts))

    print('Loading SVHN dataset')
    # only load the 73257 training examples, not the extra 531131 examples
    # this is done for computational reasons
    train_set = SVHN(which_set='train', axes=['b', 'c', 0, 1])

    # we only test the train accuracy in this evaluation.
    #    test_set = SVHN(
    #        which_set= 'train',
    #        axes= ['b', 'c', 0, 1])

    print('Building the CNN...')

    # load the randomized dataset that was saved when the training was done.
    train_set.X = np.load('X_values_SVHN.npy')
    train_set.y = np.load('Y_values_SVHN.npy')

    # load the first 7000 samples
    train_set.X = train_set.X[:7000, :, :, :]
    train_set.y = train_set.y[:7000, :]
    print(train_set.X.shape)

    # Prepare Theano variables for inputs and targets
    input = T.tensor4('inputs')
    target = T.matrix('targets')
    LR = T.scalar('LR', dtype=theano.config.floatX)

    cnn = lasagne.layers.InputLayer(shape=(None, 3, 32, 32), input_var=input)

    # 128C3-128C3-P2

Пример #5

Показать файл

def main(method, LR_start):

    name = "svhn"
    print("dataset = " + str(name))
    print("Method = " + str(method))

    # alpha is the exponential moving average factor
    alpha = .1
    print("alpha = " + str(alpha))
    epsilon = 1e-4
    print("epsilon = " + str(epsilon))

    # Training parameters
    batch_size = 50
    print("batch_size = " + str(batch_size))

    num_epochs = 50
    print("num_epochs = " + str(num_epochs))

    print("LR_start = " + str(LR_start))
    LR_decay = 0.1
    print("LR_decay=" + str(LR_decay))
    # BTW, LR decay might good for the BN moving average...

    activation = lasagne.nonlinearities.rectify

    # number of filters in the first convolutional layer
    K = 64
    print("K=" + str(K))

    print('Building the CNN...')

    # Prepare Theano variables for inputs and targets
    input = T.tensor4('inputs')
    target = T.matrix('targets')
    LR = T.scalar('LR', dtype=theano.config.floatX)

    l_in = lasagne.layers.InputLayer(shape=(None, 3, 32, 32), input_var=input)

    # 128C3-128C3-P2
    l_cnn1 = laq.Conv2DLayer(l_in,
                             num_filters=K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_bn1 = batch_norm.BatchNormLayer(l_cnn1, epsilon=epsilon, alpha=alpha)

    l_nl1 = lasagne.layers.NonlinearityLayer(l_bn1, nonlinearity=activation)

    l_cnn2 = laq.Conv2DLayer(l_nl1,
                             num_filters=K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_mp1 = lasagne.layers.MaxPool2DLayer(l_cnn2, pool_size=(2, 2))

    l_bn2 = batch_norm.BatchNormLayer(l_mp1, epsilon=epsilon, alpha=alpha)

    l_nl2 = lasagne.layers.NonlinearityLayer(l_bn2, nonlinearity=activation)
    # 256C3-256C3-P2
    l_cnn3 = laq.Conv2DLayer(l_nl2,
                             num_filters=2 * K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_bn3 = batch_norm.BatchNormLayer(l_cnn3, epsilon=epsilon, alpha=alpha)

    l_nl3 = lasagne.layers.NonlinearityLayer(l_bn3, nonlinearity=activation)

    l_cnn4 = laq.Conv2DLayer(l_nl3,
                             num_filters=2 * K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_mp2 = lasagne.layers.MaxPool2DLayer(l_cnn4, pool_size=(2, 2))

    l_bn4 = batch_norm.BatchNormLayer(l_mp2, epsilon=epsilon, alpha=alpha)

    l_nl4 = lasagne.layers.NonlinearityLayer(l_bn4, nonlinearity=activation)

    # 512C3-512C3-P2
    l_cnn5 = laq.Conv2DLayer(l_nl4,
                             num_filters=4 * K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_bn5 = batch_norm.BatchNormLayer(l_cnn5, epsilon=epsilon, alpha=alpha)

    l_nl5 = lasagne.layers.NonlinearityLayer(l_bn5, nonlinearity=activation)

    l_cnn6 = laq.Conv2DLayer(l_nl5,
                             num_filters=4 * K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_mp3 = lasagne.layers.MaxPool2DLayer(l_cnn6, pool_size=(2, 2))

    l_bn6 = batch_norm.BatchNormLayer(l_mp3, epsilon=epsilon, alpha=alpha)

    l_nl6 = lasagne.layers.NonlinearityLayer(l_bn6, nonlinearity=activation)

    # print(cnn.output_shape)

    # 1024FP-1024FP-10FP
    l_dn1 = laq.DenseLayer(l_nl6,
                           nonlinearity=lasagne.nonlinearities.identity,
                           num_units=1024,
                           method=method)

    l_bn7 = batch_norm.BatchNormLayer(l_dn1, epsilon=epsilon, alpha=alpha)

    l_nl7 = lasagne.layers.NonlinearityLayer(l_bn7, nonlinearity=activation)

    l_dn2 = laq.DenseLayer(l_nl7,
                           nonlinearity=lasagne.nonlinearities.identity,
                           num_units=1024,
                           method=method)

    l_bn8 = batch_norm.BatchNormLayer(l_dn2, epsilon=epsilon, alpha=alpha)

    l_nl8 = lasagne.layers.NonlinearityLayer(l_bn8, nonlinearity=activation)

    l_dn3 = laq.DenseLayer(l_nl8,
                           nonlinearity=lasagne.nonlinearities.identity,
                           num_units=10,
                           method=method)

    l_out = batch_norm.BatchNormLayer(l_dn3, epsilon=epsilon, alpha=alpha)

    train_output = lasagne.layers.get_output(l_out, deterministic=False)

    # squared hinge loss
    loss = T.mean(T.sqr(T.maximum(0., 1. - target * train_output)))

    if method != "FPN":
        # W updates
        W = lasagne.layers.get_all_params(l_out, quantized=True)
        W_grads = laq.compute_grads(loss, l_out)
        updates = optimizer.adam(loss_or_grads=W_grads,
                                 params=W,
                                 learning_rate=LR)
        updates = laq.clipping_scaling(updates, l_out)

        # other parameters updates
        params = lasagne.layers.get_all_params(l_out,
                                               trainable=True,
                                               quantized=False)
        updates = OrderedDict(updates.items() + optimizer.adam(
            loss_or_grads=loss, params=params, learning_rate=LR).items())

        ## update 2nd moment, can get from the adam optimizer also
        ternary_weights = laq.get_quantized_weights(loss, l_out)
        updates2 = OrderedDict()
        idx = 0
        tt_tag = lasagne.layers.get_all_params(l_out, tt=True)
        for tt_tag_temp in tt_tag:
            updates2[tt_tag_temp] = ternary_weights[idx]
            idx = idx + 1
        updates = OrderedDict(updates.items() + updates2.items())

        ## update 2nd momentum
        updates3 = OrderedDict()
        acc_tag = lasagne.layers.get_all_params(l_out, acc=True)
        idx = 0
        beta2 = 0.999
        for acc_tag_temp in acc_tag:
            updates3[acc_tag_temp] = acc_tag_temp * beta2 + W_grads[
                idx] * W_grads[idx] * (1 - beta2)
            idx = idx + 1

        updates = OrderedDict(updates.items() + updates3.items())

    else:
        params = lasagne.layers.get_all_params(l_out, trainable=True)
        updates = optimizer.adam(loss_or_grads=loss,
                                 params=params,
                                 learning_rate=LR)

    test_output = lasagne.layers.get_output(l_out, deterministic=True)

    test_loss = T.mean(T.sqr(T.maximum(0., 1. - target * test_output)))
    test_err = T.mean(T.neq(T.argmax(test_output, axis=1),
                            T.argmax(target, axis=1)),
                      dtype=theano.config.floatX)

    train_fn = theano.function([input, target, LR], loss, updates=updates)

    val_fn = theano.function([input, target], [test_loss, test_err])

    ## load data
    print('Loading SVHN dataset')

    train_set = SVHN(
        which_set='splitted_train',
        # which_set= 'valid',
        path="${SVHN_LOCAL_PATH}",
        axes=['b', 'c', 0, 1])

    valid_set = SVHN(which_set='valid',
                     path="${SVHN_LOCAL_PATH}",
                     axes=['b', 'c', 0, 1])

    test_set = SVHN(which_set='test',
                    path="${SVHN_LOCAL_PATH}",
                    axes=['b', 'c', 0, 1])

    # bc01 format
    # print train_set.X.shape
    train_set.X = np.reshape(train_set.X, (-1, 3, 32, 32))
    valid_set.X = np.reshape(valid_set.X, (-1, 3, 32, 32))
    test_set.X = np.reshape(test_set.X, (-1, 3, 32, 32))

    train_set.y = np.array(train_set.y).flatten()
    valid_set.y = np.array(valid_set.y).flatten()
    test_set.y = np.array(test_set.y).flatten()

    # Onehot the targets
    train_set.y = np.float32(np.eye(10)[train_set.y])
    valid_set.y = np.float32(np.eye(10)[valid_set.y])
    test_set.y = np.float32(np.eye(10)[test_set.y])

    # for hinge loss
    train_set.y = 2 * train_set.y - 1.
    valid_set.y = 2 * valid_set.y - 1.
    test_set.y = 2 * test_set.y - 1.

    print('Training...')

    X_train = train_set.X
    y_train = train_set.y
    X_val = valid_set.X
    y_val = valid_set.y
    X_test = test_set.X
    y_test = test_set.y

    # This function trains the model a full epoch (on the whole dataset)
    def train_epoch(X, y, LR):

        loss = 0
        batches = len(X) / batch_size
        # move shuffle here to save memory
        # k = 5
        # batches = int(batches/k)*k
        shuffled_range = range(len(X))
        np.random.shuffle(shuffled_range)

        for i in range(batches):
            tmp_ind = shuffled_range[i * batch_size:(i + 1) * batch_size]
            newloss = train_fn(X[tmp_ind], y[tmp_ind], LR)
            loss += newloss
        loss /= batches
        return loss

    # This function tests the model a full epoch (on the whole dataset)
    def val_epoch(X, y):

        err = 0
        loss = 0
        batches = len(X) / batch_size

        for i in range(batches):
            new_loss, new_err = val_fn(X[i * batch_size:(i + 1) * batch_size],
                                       y[i * batch_size:(i + 1) * batch_size])
            err += new_err
            loss += new_loss

        err = err / batches * 100
        loss /= batches

        return err, loss

    best_val_err = 100
    best_epoch = 1
    LR = LR_start
    # We iterate over epochs:
    for epoch in range(1, num_epochs + 1):

        start_time = time.time()
        train_loss = train_epoch(X_train, y_train, LR)

        val_err, val_loss = val_epoch(X_val, y_val)

        # test if validation error went down
        if val_err <= best_val_err:

            best_val_err = val_err
            best_epoch = epoch

            test_err, test_loss = val_epoch(X_test, y_test)

        epoch_duration = time.time() - start_time

        # Then we print the results for this epoch:
        print("Epoch " + str(epoch) + " of " + str(num_epochs) + " took " +
              str(epoch_duration) + "s")
        print("  LR:                            " + str(LR))
        print("  training loss:                 " + str(train_loss))
        print("  validation loss:               " + str(val_loss))
        print("  validation error rate:         " + str(val_err) + "%")
        print("  best epoch:                    " + str(best_epoch))
        print("  best validation error rate:    " + str(best_val_err) + "%")
        print("  test loss:                     " + str(test_loss))
        print("  test error rate:               " + str(test_err) + "%")

        with open(
                "{0}/{1}_lr{2}_{3}.txt".format(method, name, LR_start, method),
                "a") as myfile:
            myfile.write(
                "{0}  {1:.5f} {2:.5f} {3:.5f} {4:.5f} {5:.5f} {6:.5f} {7:.5f}\n"
                .format(epoch, train_loss, val_loss, test_loss, val_err,
                        test_err, epoch_duration, LR))

        ## Learning rate update scheme
        if epoch == 15 or epoch == 25:
            LR *= LR_decay

Пример #6

Показать файл

    train_set = SVHN(which_set='splitted_train',
                     path="${SVHN_LOCAL_PATH}",
                     axes=['b', 'c', 0, 1])

    valid_set = SVHN(which_set='valid',
                     path="${SVHN_LOCAL_PATH}",
                     axes=['b', 'c', 0, 1])

    test_set = SVHN(which_set='test',
                    path="${SVHN_LOCAL_PATH}",
                    axes=['b', 'c', 0, 1])

    # bc01 format
    # print train_set.X.shape
    train_set.X = np.reshape(train_set.X, (-1, 3, 32, 32))
    valid_set.X = np.reshape(valid_set.X, (-1, 3, 32, 32))
    test_set.X = np.reshape(test_set.X, (-1, 3, 32, 32))

    # for hinge loss (targets are already onehot)
    train_set.y = np.subtract(np.multiply(2, train_set.y), 1.)
    valid_set.y = np.subtract(np.multiply(2, valid_set.y), 1.)
    test_set.y = np.subtract(np.multiply(2, test_set.y), 1.)

    print('Building the CNN...')

    # Prepare Theano variables for inputs and targets
    input = T.tensor4('inputs')
    target = T.matrix('targets')
    LR = T.scalar('LR', dtype=theano.config.floatX)

Пример #7

Показать файл

Файл: svhn.py Проект: houlu369/Loss-aware-Binarization

def main(method, LR_start, Binarize_weight_only):

    name = "svhn"
    print("dataset = " + str(name))

    print("Binarize_weight_only=" + str(Binarize_weight_only))

    print("Method = " + str(method))

    # alpha is the exponential moving average factor
    alpha = .1
    print("alpha = " + str(alpha))
    epsilon = 1e-4
    print("epsilon = " + str(epsilon))

    # Training parameters
    batch_size = 50
    print("batch_size = " + str(batch_size))

    num_epochs = 50
    print("num_epochs = " + str(num_epochs))

    print("LR_start = " + str(LR_start))
    LR_decay = 0.1
    print("LR_decay=" + str(LR_decay))
    # BTW, LR decay might good for the BN moving average...

    if Binarize_weight_only == "w":
        activation = lasagne.nonlinearities.rectify
    else:
        activation = lab.binary_tanh_unit
    print("activation = " + str(activation))

    ## number of filters in the first convolutional layer
    K = 64
    print("K=" + str(K))

    print('Building the CNN...')

    # Prepare Theano variables for inputs and targets
    input = T.tensor4('inputs')
    target = T.matrix('targets')
    LR = T.scalar('LR', dtype=theano.config.floatX)

    l_in = lasagne.layers.InputLayer(shape=(None, 3, 32, 32), input_var=input)

    # 128C3-128C3-P2
    l_cnn1 = lab.Conv2DLayer(l_in,
                             num_filters=K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_bn1 = batch_norm.BatchNormLayer(l_cnn1, epsilon=epsilon, alpha=alpha)

    l_nl1 = lasagne.layers.NonlinearityLayer(l_bn1, nonlinearity=activation)

    l_cnn2 = lab.Conv2DLayer(l_nl1,
                             num_filters=K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_mp1 = lasagne.layers.MaxPool2DLayer(l_cnn2, pool_size=(2, 2))

    l_bn2 = batch_norm.BatchNormLayer(l_mp1, epsilon=epsilon, alpha=alpha)

    l_nl2 = lasagne.layers.NonlinearityLayer(l_bn2, nonlinearity=activation)
    # 256C3-256C3-P2
    l_cnn3 = lab.Conv2DLayer(l_nl2,
                             num_filters=2 * K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_bn3 = batch_norm.BatchNormLayer(l_cnn3, epsilon=epsilon, alpha=alpha)

    l_nl3 = lasagne.layers.NonlinearityLayer(l_bn3, nonlinearity=activation)

    l_cnn4 = lab.Conv2DLayer(l_nl3,
                             num_filters=2 * K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_mp2 = lasagne.layers.MaxPool2DLayer(l_cnn4, pool_size=(2, 2))

    l_bn4 = batch_norm.BatchNormLayer(l_mp2, epsilon=epsilon, alpha=alpha)

    l_nl4 = lasagne.layers.NonlinearityLayer(l_bn4, nonlinearity=activation)

    # 512C3-512C3-P2
    l_cnn5 = lab.Conv2DLayer(l_nl4,
                             num_filters=4 * K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_bn5 = batch_norm.BatchNormLayer(l_cnn5, epsilon=epsilon, alpha=alpha)

    l_nl5 = lasagne.layers.NonlinearityLayer(l_bn5, nonlinearity=activation)

    l_cnn6 = lab.Conv2DLayer(l_nl5,
                             num_filters=4 * K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_mp3 = lasagne.layers.MaxPool2DLayer(l_cnn6, pool_size=(2, 2))

    l_bn6 = batch_norm.BatchNormLayer(l_mp3, epsilon=epsilon, alpha=alpha)

    l_nl6 = lasagne.layers.NonlinearityLayer(l_bn6, nonlinearity=activation)

    # print(cnn.output_shape)

    # 1024FP-1024FP-10FP
    l_dn1 = lab.DenseLayer(l_nl6,
                           nonlinearity=lasagne.nonlinearities.identity,
                           num_units=1024,
                           method=method)

    l_bn7 = batch_norm.BatchNormLayer(l_dn1, epsilon=epsilon, alpha=alpha)

    l_nl7 = lasagne.layers.NonlinearityLayer(l_bn7, nonlinearity=activation)

    l_dn2 = lab.DenseLayer(l_nl7,
                           nonlinearity=lasagne.nonlinearities.identity,
                           num_units=1024,
                           method=method)

    l_bn8 = batch_norm.BatchNormLayer(l_dn2, epsilon=epsilon, alpha=alpha)

    l_nl8 = lasagne.layers.NonlinearityLayer(l_bn8, nonlinearity=activation)

    l_dn3 = lab.DenseLayer(l_nl8,
                           nonlinearity=lasagne.nonlinearities.identity,
                           num_units=10,
                           method=method)

    l_out = batch_norm.BatchNormLayer(l_dn3, epsilon=epsilon, alpha=alpha)

    train_output = lasagne.layers.get_output(l_out, deterministic=False)

    # squared hinge loss
    loss = T.mean(T.sqr(T.maximum(0., 1. - target * train_output)))

    if method != "FPN":
        # W updates
        W = lasagne.layers.get_all_params(l_out, binary=True)
        W_grads = lab.compute_grads(loss, l_out)
        updates = optimizer.adam(loss_or_grads=W_grads,
                                 params=W,
                                 learning_rate=LR)
        updates = lab.clipping_scaling(updates, l_out)

        # other parameters updates
        params = lasagne.layers.get_all_params(l_out,
                                               trainable=True,
                                               binary=False)
        updates = OrderedDict(updates.items() + optimizer.adam(
            loss_or_grads=loss, params=params, learning_rate=LR).items())

        ## update 2nd moment, can get from the adam optimizer also
        updates3 = OrderedDict()
        acc_tag = lasagne.layers.get_all_params(l_out, acc=True)
        idx = 0
        beta2 = 0.999
        for acc_tag_temp in acc_tag:
            updates3[acc_tag_temp] = acc_tag_temp * beta2 + W_grads[
                idx] * W_grads[idx] * (1 - beta2)
            idx = idx + 1

        updates = OrderedDict(updates.items() + updates3.items())
    else:
        params = lasagne.layers.get_all_params(l_out, trainable=True)
        updates = optimizer.adam(loss_or_grads=loss,
                                 params=params,
                                 learning_rate=LR)

    test_output = lasagne.layers.get_output(l_out, deterministic=True)
    test_loss = T.mean(T.sqr(T.maximum(0., 1. - target * test_output)))
    test_err = T.mean(T.neq(T.argmax(test_output, axis=1),
                            T.argmax(target, axis=1)),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training loss:
    train_fn = theano.function([input, target, LR], loss, updates=updates)
    val_fn = theano.function([input, target], [test_loss, test_err])

    ## load data
    print('Loading SVHN dataset')

    train_set = SVHN(
        which_set='splitted_train',
        # which_set= 'valid',
        path="${SVHN_LOCAL_PATH}",
        axes=['b', 'c', 0, 1])

    valid_set = SVHN(which_set='valid',
                     path="${SVHN_LOCAL_PATH}",
                     axes=['b', 'c', 0, 1])

    test_set = SVHN(which_set='test',
                    path="${SVHN_LOCAL_PATH}",
                    axes=['b', 'c', 0, 1])

    # bc01 format
    # print train_set.X.shape
    train_set.X = np.reshape(train_set.X, (-1, 3, 32, 32))
    valid_set.X = np.reshape(valid_set.X, (-1, 3, 32, 32))
    test_set.X = np.reshape(test_set.X, (-1, 3, 32, 32))

    train_set.y = np.array(train_set.y).flatten()
    valid_set.y = np.array(valid_set.y).flatten()
    test_set.y = np.array(test_set.y).flatten()

    # Onehot the targets
    train_set.y = np.float32(np.eye(10)[train_set.y])
    valid_set.y = np.float32(np.eye(10)[valid_set.y])
    test_set.y = np.float32(np.eye(10)[test_set.y])

    # for hinge loss
    train_set.y = 2 * train_set.y - 1.
    valid_set.y = 2 * valid_set.y - 1.
    test_set.y = 2 * test_set.y - 1.

    print('Training...')

    # ipdb.set_trace()
    lab.train(name, method, train_fn, val_fn, batch_size, LR_start, LR_decay,
              num_epochs, train_set.X, train_set.y, valid_set.X, valid_set.y,
              test_set.X, test_set.y)