Exemplo n.º 1
0
class TestSVHN(unittest.TestCase):
    #WARN: SVHN tries to overwrite files as part of __init__.
    def setUp(self):
        try:
            self.train = SVHN(which_set='train')
            self.test = SVHN(which_set='test')
        except (NoDataPathError, NotInstalledError):
            raise SkipTest()

    def test_get_test_set(self):
        try:
            test_from_train = self.train.get_test_set()
            self.assertTrue(numpy.all(test_from_train.get_design_matrix() == self.test.get_design_matrix()))
            self.assertTrue(numpy.all(test_from_train.get_targets() == self.test.get_targets()))
        except (NoDataPathError, NotInstalledError):
            raise SkipTest()
Exemplo n.º 2
0
    def load_dataset(self):
        # TODO: we might need other variables for identifying what kind of
        # extra preprocessing was done such as features product and number
        # of features kept based on MI.
        #base_path = get_data_path(self.state)
        #self.base_path = base_path

        #import pdb
        #pdb.set_trace()
        
        if self.state.dataset == 'mnist':
            self.test_ddm = MNIST(which_set='test', one_hot=True)

            dataset = MNIST(which_set='train', shuffle=True, one_hot=True)
            train_X, valid_X = np.split(dataset.X, [50000])
            train_y, valid_y = np.split(dataset.y, [50000])
            self.train_ddm = DenseDesignMatrix(X=train_X, y=train_y)
            self.valid_ddm = DenseDesignMatrix(X=valid_X, y=valid_y)
            
        elif self.state.dataset == 'svhn':
            self.train_ddm = SVHN(which_set='splitted_train')
            self.test_ddm = SVHN(which_set='test')
            self.valid_ddm = SVHN(which_set='valid')

        elif self.state.dataset == 'cifar10':

            self.train_ddm = My_CIFAR10(which_set='train', one_hot=True)
            self.test_ddm = None
            self.valid_ddm = My_CIFAR10(which_set='test', one_hot=True)

        
        if self.train_ddm is not None:
            self.nvis = self.train_ddm.X.shape[1]
            self.nout = self.train_ddm.y.shape[1]
            print "nvis, nout :", self.nvis, self.nout
            self.ntrain = self.train_ddm.X.shape[0]
            print "ntrain :", self.ntrain
        
        if self.valid_ddm is not None:
            self.nvalid = self.valid_ddm.X.shape[0]
            print "nvalid :", self.nvalid
        
        if self.test_ddm is not None:
            self.ntest = self.test_ddm.X.shape[0]
            print "ntest :", self.ntest
Exemplo n.º 3
0
def get_dim_input(state):

    if state.dataset == 'mnist':
        dataset = MNIST(which_set='test')
        dim = dataset.X.shape[1]
    elif state.dataset == 'svhn':
        dataset = SVHN(which_set='test')
        dim = dataset.X.shape[1]
    elif state.dataset == 'cifar10':
        dataset = My_CIFAR10(which_set='test')
        dim = dataset.X.shape[1]
    else:
        raise ValueError(
            'only mnist, cifar10 and svhn are supported for now in get_dim_input'
        )

    del dataset
    return dim
Exemplo n.º 4
0
    # Old hyperparameters
    binary_training = False
    stochastic_training = False
    binary_test = False
    stochastic_test = False
    if BinaryConnect == True:
        binary_training = True
        if stochastic == True:
            stochastic_training = True
        else:
            binary_test = True

    print 'Loading the dataset'

    train_set = SVHN(which_set='splitted_train', axes=['b', 'c', 0, 1])

    valid_set = SVHN(which_set='valid', axes=['b', 'c', 0, 1])

    test_set = SVHN(which_set='test', axes=['b', 'c', 0, 1])

    # bc01 format
    # print train_set.X.shape
    train_set.X = np.reshape(train_set.X, (598388, 3, 32, 32))
    valid_set.X = np.reshape(valid_set.X, (6000, 3, 32, 32))
    test_set.X = np.reshape(test_set.X, (26032, 3, 32, 32))

    # for hinge loss
    train_set.y = np.subtract(np.multiply(2, train_set.y), 1.)
    valid_set.y = np.subtract(np.multiply(2, valid_set.y), 1.)
    test_set.y = np.subtract(np.multiply(2, test_set.y), 1.)
    print("LR_fin = " + str(LR_fin))
    LR_decay = (LR_fin / LR_start)**(1. / num_epochs)
    print("LR_decay = " + str(LR_decay))
    # BTW, LR decay might good for the BN moving average...

    # for SVHN, depending on available CPU memory
    # 1, 2, 4, 7 or 14
    shuffle_parts = 1
    # shuffle_parts = 2 # does not work on bart5
    # shuffle_parts = 4 # seems to work on bart5
    # shuffle_parts = 7 # just to be safe
    print("shuffle_parts = " + str(shuffle_parts))

    print('Loading SVHN dataset')

    train_set = SVHN(which_set='train', axes=['b', 'c', 0, 1])

    valid_set = SVHN(which_set='valid', axes=['b', 'c', 0, 1])

    test_set = SVHN(which_set='test', axes=['b', 'c', 0, 1])

    # bc01 format
    # Inputs in the range [-1,+1]
    # print("Inputs in the range [-1,+1]")
    train_set.X = np.reshape(
        np.subtract(np.multiply(2. / 255., train_set.X), 1.), (-1, 3, 32, 32))
    valid_set.X = np.reshape(
        np.subtract(np.multiply(2. / 255., valid_set.X), 1.), (-1, 3, 32, 32))
    test_set.X = np.reshape(
        np.subtract(np.multiply(2. / 255., test_set.X), 1.), (-1, 3, 32, 32))
    # print(np.max(train_set.X))
Exemplo n.º 6
0
 # Old hyperparameters
 binary_training=False 
 stochastic_training=False
 binary_test=False
 stochastic_test=False
 if BinaryConnect == True:
     binary_training=True      
     if stochastic == True:   
         stochastic_training=True  
     else:
         binary_test=True
 
 print 'Loading the dataset' 
 
 train_set = SVHN(
         which_set= 'splitted_train',
         path= "${SVHN_LOCAL_PATH}",
         axes= ['b', 'c', 0, 1])
  
 valid_set = SVHN(
     which_set= 'valid',
     path= "${SVHN_LOCAL_PATH}",
     axes= ['b', 'c', 0, 1])
 
 test_set = SVHN(
     which_set= 'test',
     path= "${SVHN_LOCAL_PATH}",
     axes= ['b', 'c', 0, 1])
 
 # bc01 format
 # print train_set.X.shape
 train_set.X = np.reshape(train_set.X,(598388,3,32,32))
    if not os.path.isfile(os.path.join(local_path, d_set)):
        logging.info("Copying data from {0} to {1}".format(
            os.path.join(local_path, d_set), local_path))
        shutil.copyfile(os.path.join(orig_path, d_set),
                        os.path.join(local_path, d_set))


def check_dtype(data):
    if str(data.X.dtype) != config.floatX:
        logging.warning("The dataset is saved as {}, changing theano's floatX " \
                        "to the same dtype".format(data.X.dtype))
        config.floatX = str(data.X.dtype)


# Load train data
train = SVHN('splitted_train', path=local_path)
check_dtype(train)

# prepare preprocessing
pipeline = preprocessing.Pipeline()
# without batch_size there is a high chance that you might encounter memory error
# or pytables crashes
pipeline.items.append(
    preprocessing.GlobalContrastNormalization(batch_size=5000))
pipeline.items.append(preprocessing.LeCunLCN((32, 32)))

# apply the preprocessings to train
train.apply_preprocessor(pipeline, can_fit=True)
del train

# load and preprocess valid
    LR_decay = (LR_fin / LR_start)**(1. / num_epochs)
    print("LR_decay = " + str(LR_decay))
    # BTW, LR decay might good for the BN moving average...

    # for SVHN, depending on available CPU memory
    # 1, 2, 4, 7 or 14
    shuffle_parts = 1
    # shuffle_parts = 2 # does not work on bart5
    # shuffle_parts = 4 # seems to work on bart5
    # shuffle_parts = 7 # just to be safe
    print("shuffle_parts = " + str(shuffle_parts))

    print('Loading SVHN dataset')
    # only load the 73257 training examples, not the extra 531131 examples
    # this is done for computational reasons
    train_set = SVHN(which_set='train', axes=['b', 'c', 0, 1])

    # we only test the train accuracy in this evaluation.
    #    test_set = SVHN(
    #        which_set= 'train',
    #        axes= ['b', 'c', 0, 1])

    print('Building the CNN...')

    # load the randomized dataset that was saved when the training was done.
    train_set.X = np.load('X_values_SVHN.npy')
    train_set.y = np.load('Y_values_SVHN.npy')

    # load the first 7000 samples
    train_set.X = train_set.X[:7000, :, :, :]
    train_set.y = train_set.y[:7000, :]
Exemplo n.º 9
0
import os
from pylearn2.datasets.svhn import SVHN
from pylearn2.utils.string_utils import preprocess

assert 'PYLEARN2_DATA_PATH' in os.environ, "PYLEARN2_DATA_PATH not defined"

orig_path = preprocess('${PYLEARN2_DATA_PATH}/SVHN/format2/')

# Check if MAT files have been downloaded
if not os.path.isdir(orig_path):
    raise IOError("You need to download the SVHN format2 dataset MAT files "
                  "before running this conversion script.")

# Create directory in which to save the pytables files
local_path = orig_path
if not os.path.isdir(os.path.join(local_path, 'h5')):
    os.makedirs(os.path.join(local_path, 'h5'))

print("***************************************************************\n"
      "Please ignore the warning produced during this MAT -> Pytables\n"
      "conversion for the SVHN dataset. If you are creating the\n"
      "pytables for the first time then no files are modified/over-written,\n"
      "they are simply written for the first time.\n"
      "***************************************************************\n")

test = SVHN('test', path=local_path)

valid = SVHN('valid', path=local_path)

train = SVHN('splitted_train', path=local_path)
Exemplo n.º 10
0
 def setUp(self):
     try:
         self.train = SVHN(which_set='train')
         self.test = SVHN(which_set='test')
     except (NoDataPathError, NotInstalledError):
         raise SkipTest()
Exemplo n.º 11
0
def main(method, LR_start):

    name = "svhn"
    print("dataset = " + str(name))
    print("Method = " + str(method))

    # alpha is the exponential moving average factor
    alpha = .1
    print("alpha = " + str(alpha))
    epsilon = 1e-4
    print("epsilon = " + str(epsilon))

    # Training parameters
    batch_size = 50
    print("batch_size = " + str(batch_size))

    num_epochs = 50
    print("num_epochs = " + str(num_epochs))

    print("LR_start = " + str(LR_start))
    LR_decay = 0.1
    print("LR_decay=" + str(LR_decay))
    # BTW, LR decay might good for the BN moving average...

    activation = lasagne.nonlinearities.rectify

    # number of filters in the first convolutional layer
    K = 64
    print("K=" + str(K))

    print('Building the CNN...')

    # Prepare Theano variables for inputs and targets
    input = T.tensor4('inputs')
    target = T.matrix('targets')
    LR = T.scalar('LR', dtype=theano.config.floatX)

    l_in = lasagne.layers.InputLayer(shape=(None, 3, 32, 32), input_var=input)

    # 128C3-128C3-P2
    l_cnn1 = laq.Conv2DLayer(l_in,
                             num_filters=K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_bn1 = batch_norm.BatchNormLayer(l_cnn1, epsilon=epsilon, alpha=alpha)

    l_nl1 = lasagne.layers.NonlinearityLayer(l_bn1, nonlinearity=activation)

    l_cnn2 = laq.Conv2DLayer(l_nl1,
                             num_filters=K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_mp1 = lasagne.layers.MaxPool2DLayer(l_cnn2, pool_size=(2, 2))

    l_bn2 = batch_norm.BatchNormLayer(l_mp1, epsilon=epsilon, alpha=alpha)

    l_nl2 = lasagne.layers.NonlinearityLayer(l_bn2, nonlinearity=activation)
    # 256C3-256C3-P2
    l_cnn3 = laq.Conv2DLayer(l_nl2,
                             num_filters=2 * K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_bn3 = batch_norm.BatchNormLayer(l_cnn3, epsilon=epsilon, alpha=alpha)

    l_nl3 = lasagne.layers.NonlinearityLayer(l_bn3, nonlinearity=activation)

    l_cnn4 = laq.Conv2DLayer(l_nl3,
                             num_filters=2 * K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_mp2 = lasagne.layers.MaxPool2DLayer(l_cnn4, pool_size=(2, 2))

    l_bn4 = batch_norm.BatchNormLayer(l_mp2, epsilon=epsilon, alpha=alpha)

    l_nl4 = lasagne.layers.NonlinearityLayer(l_bn4, nonlinearity=activation)

    # 512C3-512C3-P2
    l_cnn5 = laq.Conv2DLayer(l_nl4,
                             num_filters=4 * K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_bn5 = batch_norm.BatchNormLayer(l_cnn5, epsilon=epsilon, alpha=alpha)

    l_nl5 = lasagne.layers.NonlinearityLayer(l_bn5, nonlinearity=activation)

    l_cnn6 = laq.Conv2DLayer(l_nl5,
                             num_filters=4 * K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_mp3 = lasagne.layers.MaxPool2DLayer(l_cnn6, pool_size=(2, 2))

    l_bn6 = batch_norm.BatchNormLayer(l_mp3, epsilon=epsilon, alpha=alpha)

    l_nl6 = lasagne.layers.NonlinearityLayer(l_bn6, nonlinearity=activation)

    # print(cnn.output_shape)

    # 1024FP-1024FP-10FP
    l_dn1 = laq.DenseLayer(l_nl6,
                           nonlinearity=lasagne.nonlinearities.identity,
                           num_units=1024,
                           method=method)

    l_bn7 = batch_norm.BatchNormLayer(l_dn1, epsilon=epsilon, alpha=alpha)

    l_nl7 = lasagne.layers.NonlinearityLayer(l_bn7, nonlinearity=activation)

    l_dn2 = laq.DenseLayer(l_nl7,
                           nonlinearity=lasagne.nonlinearities.identity,
                           num_units=1024,
                           method=method)

    l_bn8 = batch_norm.BatchNormLayer(l_dn2, epsilon=epsilon, alpha=alpha)

    l_nl8 = lasagne.layers.NonlinearityLayer(l_bn8, nonlinearity=activation)

    l_dn3 = laq.DenseLayer(l_nl8,
                           nonlinearity=lasagne.nonlinearities.identity,
                           num_units=10,
                           method=method)

    l_out = batch_norm.BatchNormLayer(l_dn3, epsilon=epsilon, alpha=alpha)

    train_output = lasagne.layers.get_output(l_out, deterministic=False)

    # squared hinge loss
    loss = T.mean(T.sqr(T.maximum(0., 1. - target * train_output)))

    if method != "FPN":
        # W updates
        W = lasagne.layers.get_all_params(l_out, quantized=True)
        W_grads = laq.compute_grads(loss, l_out)
        updates = optimizer.adam(loss_or_grads=W_grads,
                                 params=W,
                                 learning_rate=LR)
        updates = laq.clipping_scaling(updates, l_out)

        # other parameters updates
        params = lasagne.layers.get_all_params(l_out,
                                               trainable=True,
                                               quantized=False)
        updates = OrderedDict(updates.items() + optimizer.adam(
            loss_or_grads=loss, params=params, learning_rate=LR).items())

        ## update 2nd moment, can get from the adam optimizer also
        ternary_weights = laq.get_quantized_weights(loss, l_out)
        updates2 = OrderedDict()
        idx = 0
        tt_tag = lasagne.layers.get_all_params(l_out, tt=True)
        for tt_tag_temp in tt_tag:
            updates2[tt_tag_temp] = ternary_weights[idx]
            idx = idx + 1
        updates = OrderedDict(updates.items() + updates2.items())

        ## update 2nd momentum
        updates3 = OrderedDict()
        acc_tag = lasagne.layers.get_all_params(l_out, acc=True)
        idx = 0
        beta2 = 0.999
        for acc_tag_temp in acc_tag:
            updates3[acc_tag_temp] = acc_tag_temp * beta2 + W_grads[
                idx] * W_grads[idx] * (1 - beta2)
            idx = idx + 1

        updates = OrderedDict(updates.items() + updates3.items())

    else:
        params = lasagne.layers.get_all_params(l_out, trainable=True)
        updates = optimizer.adam(loss_or_grads=loss,
                                 params=params,
                                 learning_rate=LR)

    test_output = lasagne.layers.get_output(l_out, deterministic=True)

    test_loss = T.mean(T.sqr(T.maximum(0., 1. - target * test_output)))
    test_err = T.mean(T.neq(T.argmax(test_output, axis=1),
                            T.argmax(target, axis=1)),
                      dtype=theano.config.floatX)

    train_fn = theano.function([input, target, LR], loss, updates=updates)

    val_fn = theano.function([input, target], [test_loss, test_err])

    ## load data
    print('Loading SVHN dataset')

    train_set = SVHN(
        which_set='splitted_train',
        # which_set= 'valid',
        path="${SVHN_LOCAL_PATH}",
        axes=['b', 'c', 0, 1])

    valid_set = SVHN(which_set='valid',
                     path="${SVHN_LOCAL_PATH}",
                     axes=['b', 'c', 0, 1])

    test_set = SVHN(which_set='test',
                    path="${SVHN_LOCAL_PATH}",
                    axes=['b', 'c', 0, 1])

    # bc01 format
    # print train_set.X.shape
    train_set.X = np.reshape(train_set.X, (-1, 3, 32, 32))
    valid_set.X = np.reshape(valid_set.X, (-1, 3, 32, 32))
    test_set.X = np.reshape(test_set.X, (-1, 3, 32, 32))

    train_set.y = np.array(train_set.y).flatten()
    valid_set.y = np.array(valid_set.y).flatten()
    test_set.y = np.array(test_set.y).flatten()

    # Onehot the targets
    train_set.y = np.float32(np.eye(10)[train_set.y])
    valid_set.y = np.float32(np.eye(10)[valid_set.y])
    test_set.y = np.float32(np.eye(10)[test_set.y])

    # for hinge loss
    train_set.y = 2 * train_set.y - 1.
    valid_set.y = 2 * valid_set.y - 1.
    test_set.y = 2 * test_set.y - 1.

    print('Training...')

    X_train = train_set.X
    y_train = train_set.y
    X_val = valid_set.X
    y_val = valid_set.y
    X_test = test_set.X
    y_test = test_set.y

    # This function trains the model a full epoch (on the whole dataset)
    def train_epoch(X, y, LR):

        loss = 0
        batches = len(X) / batch_size
        # move shuffle here to save memory
        # k = 5
        # batches = int(batches/k)*k
        shuffled_range = range(len(X))
        np.random.shuffle(shuffled_range)

        for i in range(batches):
            tmp_ind = shuffled_range[i * batch_size:(i + 1) * batch_size]
            newloss = train_fn(X[tmp_ind], y[tmp_ind], LR)
            loss += newloss
        loss /= batches
        return loss

    # This function tests the model a full epoch (on the whole dataset)
    def val_epoch(X, y):

        err = 0
        loss = 0
        batches = len(X) / batch_size

        for i in range(batches):
            new_loss, new_err = val_fn(X[i * batch_size:(i + 1) * batch_size],
                                       y[i * batch_size:(i + 1) * batch_size])
            err += new_err
            loss += new_loss

        err = err / batches * 100
        loss /= batches

        return err, loss

    best_val_err = 100
    best_epoch = 1
    LR = LR_start
    # We iterate over epochs:
    for epoch in range(1, num_epochs + 1):

        start_time = time.time()
        train_loss = train_epoch(X_train, y_train, LR)

        val_err, val_loss = val_epoch(X_val, y_val)

        # test if validation error went down
        if val_err <= best_val_err:

            best_val_err = val_err
            best_epoch = epoch

            test_err, test_loss = val_epoch(X_test, y_test)

        epoch_duration = time.time() - start_time

        # Then we print the results for this epoch:
        print("Epoch " + str(epoch) + " of " + str(num_epochs) + " took " +
              str(epoch_duration) + "s")
        print("  LR:                            " + str(LR))
        print("  training loss:                 " + str(train_loss))
        print("  validation loss:               " + str(val_loss))
        print("  validation error rate:         " + str(val_err) + "%")
        print("  best epoch:                    " + str(best_epoch))
        print("  best validation error rate:    " + str(best_val_err) + "%")
        print("  test loss:                     " + str(test_loss))
        print("  test error rate:               " + str(test_err) + "%")

        with open(
                "{0}/{1}_lr{2}_{3}.txt".format(method, name, LR_start, method),
                "a") as myfile:
            myfile.write(
                "{0}  {1:.5f} {2:.5f} {3:.5f} {4:.5f} {5:.5f} {6:.5f} {7:.5f}\n"
                .format(epoch, train_loss, val_loss, test_loss, val_err,
                        test_err, epoch_duration, LR))

        ## Learning rate update scheme
        if epoch == 15 or epoch == 25:
            LR *= LR_decay
Exemplo n.º 12
0
    W_LR_scale = "Glorot"  # "Glorot" means we are using the coefficients from Glorot's paper
    print("W_LR_scale = " + str(W_LR_scale))

    # Decaying LR
    LR_start = 0.01
    print("LR_start = " + str(LR_start))
    LR_fin = 0.000003
    print("LR_fin = " + str(LR_fin))
    LR_decay = (LR_fin / LR_start)**(1. / num_epochs)
    print("LR_decay = " + str(LR_decay))
    # BTW, LR decay might good for the BN moving average...

    print('Loading SVHN dataset')

    train_set = SVHN(which_set='splitted_train',
                     path="${SVHN_LOCAL_PATH}",
                     axes=['b', 'c', 0, 1])

    valid_set = SVHN(which_set='valid',
                     path="${SVHN_LOCAL_PATH}",
                     axes=['b', 'c', 0, 1])

    test_set = SVHN(which_set='test',
                    path="${SVHN_LOCAL_PATH}",
                    axes=['b', 'c', 0, 1])

    # bc01 format
    # print train_set.X.shape
    train_set.X = np.reshape(train_set.X, (-1, 3, 32, 32))
    valid_set.X = np.reshape(valid_set.X, (-1, 3, 32, 32))
    test_set.X = np.reshape(test_set.X, (-1, 3, 32, 32))
Exemplo n.º 13
0
def load_data(dataset, train_percent=0.8, val_percent=0.2):
    """ Load MNIST, CIFAR-10 dataset
    dataset: string: MNIST or CIFAR-10
    train_percent: float: percentage of the dataset to be used for training
    val_per: string: percentage of the dataset to be used for the validation purpose
    Output:
    (train_x, val_x, test_x, train_y, val_y, test_y)
    """
    zero_mean = False
    

    if(dataset.lower() == 'mnist'):
        print('Loading MNIST dataset from pylearn2')
        train_set_size = int(_DATASET_SIZE['mnist'] * train_percent)
        train_data = MNIST(which_set='train', start=0, stop=train_set_size, center=zero_mean)
        val_data = MNIST(which_set='train', start=train_set_size, stop=_DATASET_SIZE[dataset], center=zero_mean)
        test_data = MNIST(which_set='test', center=zero_mean)

        # convert labels into 1D array
        train_data.y = np.hstack(train_data.y)
        val_data.y = np.hstack(val_data.y)
        test_data.y = np.hstack(test_data.y)
        # create 10 dimensional vector corresponding to each label
        train_data.y = np.float32(np.eye(10))[train_data.y]
        val_data.y = np.float32(np.eye(10))[val_data.y]
        test_data.y = np.float32(np.eye(10))[test_data.y]

        # TODO: convert the data to range [-1,1]
        # reshape the data into image size(#images, channels, height, width). 
        # Each row contains an image in the original dataset
        train_data.X = np.reshape(train_data.X, (-1, 1, 28, 28))
        val_data.X = np.reshape(val_data.X, (-1, 1, 28, 28))
        test_data.X = np.reshape(test_data.X, (-1, 1, 28, 28))

        # convert to [-1 1] range
        train_data.X = train_data.X * 2.0 - 1.0
        val_data.X = val_data.X * 2.0 - 1.0
        test_data.X = test_data.X * 2.0 - 1.0
       

    elif(dataset.lower() == 'cifar10'):
        print('Loading CIFAR-10 dataset from pylearn2')
        train_set_size = int(_DATASET_SIZE['cifar10'] * train_percent)
        train_data = CIFAR10(which_set='train', start=0, stop=train_set_size)
        val_data = CIFAR10(which_set='train', start=train_set_size, stop=50000)
        test_data = CIFAR10(which_set='test')

        # convert labels into 1D array
        train_data.y = np.hstack(train_data.y)
        val_data.y = np.hstack(val_data.y)
        test_data.y = np.hstack(test_data.y)
        # create 10 dimensional vector corresponding to each label
        train_data.y = np.float32(np.eye(10))[train_data.y]
        val_data.y = np.float32(np.eye(10))[val_data.y]
        test_data.y = np.float32(np.eye(10))[test_data.y]

        # TODO: convert the data to range [-1,1]
        # reshape the data into image size(#images, channels, height, width). 
        # Each row contains an image in the original dataset
        train_data.X = np.reshape(train_data.X, (-1, 3, 32, 32))
        val_data.X = np.reshape(val_data.X, (-1, 3, 32, 32))
        test_data.X = np.reshape(test_data.X, (-1, 3, 32, 32))

        # convert to [-1 1] range
        train_data.X = train_data.X * (2.0/255) - 1.0
        val_data.X = val_data.X * (2.0/255) - 1.0
        test_data.X = test_data.X * (2.0/255) - 1.0
    elif(dataset.lower() == 'svhn'):
        train_data = SVHN(which_set= 'splitted_train', axes= ['b', 'c', 0, 1])     
        val_data = SVHN(which_set= 'valid', axes= ['b', 'c', 0, 1])    
        test_data = SVHN(which_set= 'test', axes= ['b', 'c', 0, 1])
        # convert labels into 1D array
        train_data.y = np.hstack(train_data.y)
        val_data.y = np.hstack(val_data.y)
        test_data.y = np.hstack(test_data.y)
        # create 10 dimensional vector corresponding to each label
        train_data.y = np.float32(np.eye(10))[train_data.y]
        val_data.y = np.float32(np.eye(10))[val_data.y]
        test_data.y = np.float32(np.eye(10))[test_data.y]
        # convert to [-1, 1] range
        train_data.X = np.reshape(np.subtract(np.multiply(2.0/255, train_data.X), 1.0), (-1, 3, 32, 32))
        val_data.X = np.reshape(np.subtract(np.multiply(2.0/255, val_data.X), 1.0), (-1, 3, 32, 32))
        test_data.X = np.reshape(np.subtract(np.multiply(2.0/255, test_data.X), 1.0), (-1, 3, 32, 32))
    else:
        print('This dataset is not supported. Only MNIST and CIFAR-10 are supported as of now.')
        raise ValueError('Dataset is not supported')

    print('Trainset shape = ', train_data.X.shape, train_data.y.shape)
    print('Valset shape = ', val_data.X.shape, val_data.y.shape)
    print('Testset shape = ', test_data.X.shape, test_data.y.shape)
    return train_data.X, val_data.X, test_data.X, train_data.y, val_data.y, test_data.y
Exemplo n.º 14
0
                                preprocessor=preprocessor,
                                start=45000,
                                stop=50000)
        test_set = ZCA_Dataset(preprocessed_dataset=serial.load(
            "${PYLEARN2_DATA_PATH}/cifar10/pylearn2_gcn_whitened/test.pkl"),
                               preprocessor=preprocessor)

        # for both datasets, onehot the target
        train_set.y = np.float32(onehot(train_set.y))
        valid_set.y = np.float32(onehot(valid_set.y))
        test_set.y = np.float32(onehot(test_set.y))

    elif dataset == "SVHN":

        train_set = SVHN(which_set='splitted_train',
                         path="${SVHN_LOCAL_PATH}",
                         axes=['b', 'c', 0, 1])

        valid_set = SVHN(which_set='valid',
                         path="${SVHN_LOCAL_PATH}",
                         axes=['b', 'c', 0, 1])

        test_set = SVHN(which_set='test',
                        path="${SVHN_LOCAL_PATH}",
                        axes=['b', 'c', 0, 1])

    print 'Creating the model'

    # storing format hyperparameters
    format = sys.argv[2]
Exemplo n.º 15
0
class HPS:
    def __init__(self,
                 state,
                 base_channel_names = ['train_objective'],
                 save_prefix = "model_",
                 cache_dataset = True):
        self.cache_dataset = cache_dataset
        self.dataset_cache = {}
        self.state = state
        self.mbsb_channel_name = self.state.term_array.early_stopping.save_best_channel
        self.base_channel_names = base_channel_names
        self.save_prefix = save_prefix
        # TODO store this in data for each experiment or dataset

    def run(self):
        (model, learner, algorithm) \
            = self.get_config()
#         try:
        print 'learning'
          
        learner.main_loop()

#         except Exception, e:
#             print e

        print 'End of model training'

    def get_config(self):
        # dataset
        self.load_dataset()

        # model
        self.load_model()

        # monitor:
        self.setup_monitor()

        # training algorithm
        algorithm = self.get_train()

        # extensions
        extensions = self.get_extensions()

        # channels
        #self.setup_channels()

        # learner
        learner = Train(dataset=self.train_ddm,
                        model=self.model,
                        algorithm=algorithm,
                        extensions=extensions)

        return (self.model, learner, algorithm)

    def load_dataset(self):
        # TODO: we might need other variables for identifying what kind of
        # extra preprocessing was done such as features product and number
        # of features kept based on MI.
        #base_path = get_data_path(self.state)
        #self.base_path = base_path

        #import pdb
        #pdb.set_trace()
        
        if self.state.dataset == 'mnist':
            self.test_ddm = MNIST(which_set='test', one_hot=True)

            dataset = MNIST(which_set='train', shuffle=True, one_hot=True)
            train_X, valid_X = np.split(dataset.X, [50000])
            train_y, valid_y = np.split(dataset.y, [50000])
            self.train_ddm = DenseDesignMatrix(X=train_X, y=train_y)
            self.valid_ddm = DenseDesignMatrix(X=valid_X, y=valid_y)
            
        elif self.state.dataset == 'svhn':
            self.train_ddm = SVHN(which_set='splitted_train')
            self.test_ddm = SVHN(which_set='test')
            self.valid_ddm = SVHN(which_set='valid')

        elif self.state.dataset == 'cifar10':

            self.train_ddm = My_CIFAR10(which_set='train', one_hot=True)
            self.test_ddm = None
            self.valid_ddm = My_CIFAR10(which_set='test', one_hot=True)

        
        if self.train_ddm is not None:
            self.nvis = self.train_ddm.X.shape[1]
            self.nout = self.train_ddm.y.shape[1]
            print "nvis, nout :", self.nvis, self.nout
            self.ntrain = self.train_ddm.X.shape[0]
            print "ntrain :", self.ntrain
        
        if self.valid_ddm is not None:
            self.nvalid = self.valid_ddm.X.shape[0]
            print "nvalid :", self.nvalid
        
        if self.test_ddm is not None:
            self.ntest = self.test_ddm.X.shape[0]
            print "ntest :", self.ntest

    def load_model(self):
        model_class = self.state.model_class
        fn = getattr(self, 'get_model_'+model_class)
        self.model = fn()
        return self.model
 
    def get_model_mlp(self):
        self.dropout = False
        self.input_include_probs = {}
        self.input_scales = {}
        self.weight_decay = False
        self.weight_decays = {}
        self.l1_weight_decay = False
        self.l1_weight_decays = {}

        nnet_layers = self.state.layers
        input_space_id = self.state.input_space_id
        nvis = self.nvis
        self.batch_size = self.state.batch_size
        # TODO: add input_space as a config option.
        input_space = None
        # TODO: top_view always False for the moment.
        self.topo_view = False
        assert nvis is not None
        layers = []
        
        for i,layer in enumerate(nnet_layers.values()):
            layer = expand(layer)
            layer = self.get_layer(layer, i)
            layers.append(layer)
        # create MLP:
        print layers
        model = My_MLP(layers=layers,input_space=input_space,nvis=nvis,
                    batch_size=self.batch_size)
        self.mlp = model
        return model

    def get_layer(self, layer, layer_id):
        layer_class = layer.layer_class
        layer_name = layer.layer_name
        dropout_scale = layer.dropout_scale
        dropout_prob = layer.dropout_probability
        weight_decay = layer.weight_decay
        l1_weight_decay = layer.l1_weight_decay
        fn = getattr(self, 'get_layer_'+layer_class)
        if layer_name is None:
            layer_name = layer_class+str(layer_id)
            layer.layer_name = layer_name
        layer = fn(layer)
        # per-layer cost function parameters:
        if (dropout_scale is not None):
            self.dropout = True
            self.input_scales[layer_name] = dropout_scale
        if (dropout_prob is not None):
            self.dropout = True
            self.input_include_probs[layer_name] = (1. - dropout_prob)
        if  (weight_decay is not None):
            self.weight_decay = False
            self.weight_decays[layer_name] = weight_decay
        if  (l1_weight_decay is not None):
            self.l1_weight_decay = False
            self.l1_weight_decays[layer_name] = l1_weight_decay
        return layer

    def get_layer_sigmoid(self, layer):
        return Sigmoid(layer_name=layer.layer_name,dim=layer.dim,irange=layer.irange,
                istdev=layer.istdev,sparse_init=layer.sparse_init,
                sparse_stdev=layer.sparse_stdev, include_prob=layer.include_prob,
                init_bias=layer.init_bias,W_lr_scale=layer.W_lr_scale,
                b_lr_scale=layer.b_lr_scale,max_col_norm=layer.max_col_norm,
                max_row_norm=layer.max_row_norm)

    def get_layer_tanh(self, layer):
        return My_Tanh(layer_name=layer.layer_name,dim=layer.dim,irange=layer.irange,
                istdev=layer.istdev,sparse_init=layer.sparse_init,
                sparse_stdev=layer.sparse_stdev, include_prob=layer.include_prob,
                init_bias=layer.init_bias,W_lr_scale=layer.W_lr_scale,
                b_lr_scale=layer.b_lr_scale,max_col_norm=layer.max_col_norm,
                max_row_norm=layer.max_row_norm)

    def get_layer_rectifiedlinear(self, layer):
        # TODO: left_slope is set to 0.0  It should be set by the user!
        layer.left_slope = 0.0
        return RectifiedLinear(layer_name=layer.layer_name,dim=layer.dim,irange=layer.irange,
                istdev=layer.istdev,sparse_init=layer.sparse_init,
                sparse_stdev=layer.sparse_stdev, include_prob=layer.include_prob,
                init_bias=layer.init_bias,W_lr_scale=layer.W_lr_scale,
                b_lr_scale=layer.b_lr_scale,max_col_norm=layer.max_col_norm,
                max_row_norm=layer.max_row_norm,
                left_slope=layer.left_slope,use_bias=layer.use_bias)
        
    def get_layer_softmax(self, layer):
        
        return My_Softmax(layer_name=layer.layer_name,n_classes=layer.dim,irange=layer.irange,
                istdev=layer.istdev,sparse_init=layer.sparse_init,
                init_bias_target_marginals=layer.init_bias, W_lr_scale=layer.W_lr_scale,
                b_lr_scale=layer.b_lr_scale, max_col_norm=layer.max_col_norm,
                max_row_norm=layer.max_row_norm)
        
    def get_layer_noisyRELU(self, layer):
        
        return NoisyRELU(
                        dim=layer.dim,
                        layer_name=layer.layer_name,
                        irange=layer.irange,
                        sparse_init=layer.sparse_init,
                        W_lr_scale=layer.W_lr_scale,
                        b_lr_scale=layer.b_lr_scale,
                        mask_weights = None,
                        max_row_norm=layer.max_row_norm,
                        max_col_norm=layer.max_col_norm,
                        use_bias=True,
                        noise_factor=layer.noise_factor,
                        desired_active_rate=layer.desired_active_rate,
                        adjust_threshold_factor=layer.adjust_threshold_factor
                        )
        
    def get_layer_gaussianRELU(self, layer):
        
        return GaussianRELU(
                        dim=layer.dim,
                        layer_name=layer.layer_name,
                        irange=layer.irange,
                        sparse_init=layer.sparse_init,
                        W_lr_scale=layer.W_lr_scale,
                        b_lr_scale=layer.b_lr_scale,
                        mask_weights = None,
                        max_row_norm=layer.max_row_norm,
                        max_col_norm=layer.max_col_norm,
                        use_bias=True,
                        desired_active_rate=layer.desired_active_rate,
                        adjust_threshold_factor=layer.adjust_threshold_factor,
                        noise_std=layer.noise_std
                        )

    def setup_monitor(self):
        if self.topo_view:
            print "topo view"
            self.minibatch = T.as_tensor_variable(
                        self.valid_ddm.get_batch_topo(self.batch_size),
                        name='minibatch'
                    )
        else:
            print "design view"
            batch = self.valid_ddm.get_batch_design(self.batch_size)
            if isinstance(batch, spp.csr_matrix):
                print "sparse2"
                self.minibatch = self.model.get_input_space().make_batch_theano()
                print type(self.minibatch)
            else:
                self.minibatch = T.as_tensor_variable(
                        self.valid_ddm.get_batch_design(self.batch_size),
                        name='minibatch'
                    )

        self.target = T.matrix('target')

        self.monitor = Monitor.get_monitor(self.model)
        self.log_channel_names = []
        self.log_channel_names.extend(self.base_channel_names)

#         self.monitor.add_dataset(self.valid_ddm, self.state.train_iteration_mode,
#                                     self.batch_size)
#         if self.test_ddm is not None:
#             self.monitor.add_dataset(self.test_ddm, self.state.train_iteration_mode,
#                                         self.batch_size)

    def get_train(self):
        train_class = self.state.train_class
        fn = getattr(self, 'get_train_'+train_class)
        return fn()

    def get_train_sgd(self):

        cost = MethodCost('cost_from_X')
        #cost = self.get_costs()
        num_train_batch = (self.ntrain/self.batch_size)
        print "num training batches:", num_train_batch

        termination_criterion = self.get_terminations()

        monitoring_dataset = {}
        for dataset_id in self.state.monitoring_dataset:
            if dataset_id == 'test' and self.test_ddm is not None:
                monitoring_dataset['test'] = self.test_ddm
            elif dataset_id == 'valid' and self.valid_ddm is not None:
                monitoring_dataset['valid'] = self.valid_ddm
            else:
                monitoring_dataset = None
            
        return SGD( learning_rate=self.state.learning_rate,
                    batch_size=self.state.batch_size,
                    cost=cost,
                    batches_per_iter=num_train_batch,
                    monitoring_dataset=monitoring_dataset,
                    termination_criterion=termination_criterion,
                    init_momentum=self.state.init_momentum,
                    train_iteration_mode=self.state.train_iteration_mode)


    def get_terminations(self):
        if 'term_array' not in self.state:
            return None
        terminations = []

        for term_obj in self.state.term_array.values():
            fn = getattr(self, 'get_term_' + term_obj.term_class)
            terminations.append(fn(term_obj))
        if len(terminations) > 1:
            return And(terminations)
        return terminations[0]

    def get_term_epochcounter(self, term_obj):
        return EpochCounter(term_obj.max_epochs)

    def get_term_monitorbased(self, term_obj):
        print 'monitor_based'
        return MonitorBased(
                prop_decrease=term_obj.proportional_decrease,
                N=term_obj.max_epochs,
                channel_name=term_obj.channel_name
            )

    def get_extensions(self):
        if 'ext_array' not in self.state:
            return []
        extensions = []

        for ext_obj in self.state.ext_array.values():
            fn = getattr(self, 'get_ext_' + ext_obj.ext_class)
            extensions.append(fn(ext_obj))

        # monitor based save best
        print 'save best channel', self.mbsb_channel_name
        if self.mbsb_channel_name is not None:
            self.save_path = self.save_prefix + str(self.state.config_id) + "_optimum.pkl"
            extensions.append(MonitorBasedSaveBest(
                    channel_name = self.mbsb_channel_name,
                    save_path = self.save_path
                )
            )

        return extensions

    def get_ext_exponentialdecayoverepoch(self, ext_obj):
        return ExponentialDecayOverEpoch(
            decay_factor=ext_obj.decay_factor,
            min_lr_scale=ext_obj.min_lr_scale
        )

    def get_ext_momentumadjustor(self, ext_obj):
        return MomentumAdjustor(
            final_momentum=ext_obj.final_momentum,
            start=ext_obj.start_epoch,
            saturate=ext_obj.saturate_epoch
        )
Exemplo n.º 16
0
    os.makedirs(os.path.join(local_path, 'h5'))

for d_set in [train_name, valid_name, test_name]:
    if not os.path.isfile(os.path.join(local_path, d_set)):
        logging.info("Copying data from {0} to {1}".format(os.path.join(local_path, d_set), local_path))
        shutil.copyfile(os.path.join(orig_path, d_set),
                    os.path.join(local_path, d_set))

def check_dtype(data):
    if str(data.X.dtype) != config.floatX:
        logging.warning("The dataset is saved as {}, changing theano's floatX "\
                "to the same dtype".format(data.X.dtype))
        config.floatX = str(data.X.dtype)

# Load train data
train = SVHN('splitted_train', path=local_path)
check_dtype(train)

# prepare preprocessing
pipeline = preprocessing.Pipeline()
# without batch_size there is a high chance that you might encounter memory error
# or pytables crashes
pipeline.items.append(preprocessing.GlobalContrastNormalization(batch_size=5000))
pipeline.items.append(preprocessing.LeCunLCN((32,32)))

# apply the preprocessings to train
train.apply_preprocessor(pipeline, can_fit=True)
del train

# load and preprocess valid
valid = SVHN('valid', path=local_path)
Exemplo n.º 17
0
 LR_decay = (LR_fin/LR_start)**(1./num_epochs)
 print("LR_decay = "+str(LR_decay))
 # BTW, LR decay might good for the BN moving average...
 
 # for SVHN, depending on available CPU memory
 # 1, 2, 4, 7 or 14
 shuffle_parts = 1 
 # shuffle_parts = 2 # does not work on bart5
 # shuffle_parts = 4 # seems to work on bart5
 # shuffle_parts = 7 # just to be safe
 print("shuffle_parts = "+str(shuffle_parts))
 
 print('Loading SVHN dataset')
 
 train_set = SVHN(
         which_set= 'splitted_train',
         axes= ['b', 'c', 0, 1])
  
 valid_set = SVHN(
     which_set= 'valid',
     axes= ['b', 'c', 0, 1])
 
 test_set = SVHN(
     which_set= 'test',
     axes= ['b', 'c', 0, 1])
 
 # bc01 format
 # Inputs in the range [-1,+1]
 # print("Inputs in the range [-1,+1]")
 train_set.X = np.reshape(np.subtract(np.multiply(2./255.,train_set.X),1.),(-1,3,32,32))
 valid_set.X = np.reshape(np.subtract(np.multiply(2./255.,valid_set.X),1.),(-1,3,32,32))
Exemplo n.º 18
0
def main(method, LR_start, Binarize_weight_only):

    name = "svhn"
    print("dataset = " + str(name))

    print("Binarize_weight_only=" + str(Binarize_weight_only))

    print("Method = " + str(method))

    # alpha is the exponential moving average factor
    alpha = .1
    print("alpha = " + str(alpha))
    epsilon = 1e-4
    print("epsilon = " + str(epsilon))

    # Training parameters
    batch_size = 50
    print("batch_size = " + str(batch_size))

    num_epochs = 50
    print("num_epochs = " + str(num_epochs))

    print("LR_start = " + str(LR_start))
    LR_decay = 0.1
    print("LR_decay=" + str(LR_decay))
    # BTW, LR decay might good for the BN moving average...

    if Binarize_weight_only == "w":
        activation = lasagne.nonlinearities.rectify
    else:
        activation = lab.binary_tanh_unit
    print("activation = " + str(activation))

    ## number of filters in the first convolutional layer
    K = 64
    print("K=" + str(K))

    print('Building the CNN...')

    # Prepare Theano variables for inputs and targets
    input = T.tensor4('inputs')
    target = T.matrix('targets')
    LR = T.scalar('LR', dtype=theano.config.floatX)

    l_in = lasagne.layers.InputLayer(shape=(None, 3, 32, 32), input_var=input)

    # 128C3-128C3-P2
    l_cnn1 = lab.Conv2DLayer(l_in,
                             num_filters=K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_bn1 = batch_norm.BatchNormLayer(l_cnn1, epsilon=epsilon, alpha=alpha)

    l_nl1 = lasagne.layers.NonlinearityLayer(l_bn1, nonlinearity=activation)

    l_cnn2 = lab.Conv2DLayer(l_nl1,
                             num_filters=K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_mp1 = lasagne.layers.MaxPool2DLayer(l_cnn2, pool_size=(2, 2))

    l_bn2 = batch_norm.BatchNormLayer(l_mp1, epsilon=epsilon, alpha=alpha)

    l_nl2 = lasagne.layers.NonlinearityLayer(l_bn2, nonlinearity=activation)
    # 256C3-256C3-P2
    l_cnn3 = lab.Conv2DLayer(l_nl2,
                             num_filters=2 * K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_bn3 = batch_norm.BatchNormLayer(l_cnn3, epsilon=epsilon, alpha=alpha)

    l_nl3 = lasagne.layers.NonlinearityLayer(l_bn3, nonlinearity=activation)

    l_cnn4 = lab.Conv2DLayer(l_nl3,
                             num_filters=2 * K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_mp2 = lasagne.layers.MaxPool2DLayer(l_cnn4, pool_size=(2, 2))

    l_bn4 = batch_norm.BatchNormLayer(l_mp2, epsilon=epsilon, alpha=alpha)

    l_nl4 = lasagne.layers.NonlinearityLayer(l_bn4, nonlinearity=activation)

    # 512C3-512C3-P2
    l_cnn5 = lab.Conv2DLayer(l_nl4,
                             num_filters=4 * K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_bn5 = batch_norm.BatchNormLayer(l_cnn5, epsilon=epsilon, alpha=alpha)

    l_nl5 = lasagne.layers.NonlinearityLayer(l_bn5, nonlinearity=activation)

    l_cnn6 = lab.Conv2DLayer(l_nl5,
                             num_filters=4 * K,
                             filter_size=(3, 3),
                             pad=1,
                             nonlinearity=lasagne.nonlinearities.identity,
                             method=method)

    l_mp3 = lasagne.layers.MaxPool2DLayer(l_cnn6, pool_size=(2, 2))

    l_bn6 = batch_norm.BatchNormLayer(l_mp3, epsilon=epsilon, alpha=alpha)

    l_nl6 = lasagne.layers.NonlinearityLayer(l_bn6, nonlinearity=activation)

    # print(cnn.output_shape)

    # 1024FP-1024FP-10FP
    l_dn1 = lab.DenseLayer(l_nl6,
                           nonlinearity=lasagne.nonlinearities.identity,
                           num_units=1024,
                           method=method)

    l_bn7 = batch_norm.BatchNormLayer(l_dn1, epsilon=epsilon, alpha=alpha)

    l_nl7 = lasagne.layers.NonlinearityLayer(l_bn7, nonlinearity=activation)

    l_dn2 = lab.DenseLayer(l_nl7,
                           nonlinearity=lasagne.nonlinearities.identity,
                           num_units=1024,
                           method=method)

    l_bn8 = batch_norm.BatchNormLayer(l_dn2, epsilon=epsilon, alpha=alpha)

    l_nl8 = lasagne.layers.NonlinearityLayer(l_bn8, nonlinearity=activation)

    l_dn3 = lab.DenseLayer(l_nl8,
                           nonlinearity=lasagne.nonlinearities.identity,
                           num_units=10,
                           method=method)

    l_out = batch_norm.BatchNormLayer(l_dn3, epsilon=epsilon, alpha=alpha)

    train_output = lasagne.layers.get_output(l_out, deterministic=False)

    # squared hinge loss
    loss = T.mean(T.sqr(T.maximum(0., 1. - target * train_output)))

    if method != "FPN":
        # W updates
        W = lasagne.layers.get_all_params(l_out, binary=True)
        W_grads = lab.compute_grads(loss, l_out)
        updates = optimizer.adam(loss_or_grads=W_grads,
                                 params=W,
                                 learning_rate=LR)
        updates = lab.clipping_scaling(updates, l_out)

        # other parameters updates
        params = lasagne.layers.get_all_params(l_out,
                                               trainable=True,
                                               binary=False)
        updates = OrderedDict(updates.items() + optimizer.adam(
            loss_or_grads=loss, params=params, learning_rate=LR).items())

        ## update 2nd moment, can get from the adam optimizer also
        updates3 = OrderedDict()
        acc_tag = lasagne.layers.get_all_params(l_out, acc=True)
        idx = 0
        beta2 = 0.999
        for acc_tag_temp in acc_tag:
            updates3[acc_tag_temp] = acc_tag_temp * beta2 + W_grads[
                idx] * W_grads[idx] * (1 - beta2)
            idx = idx + 1

        updates = OrderedDict(updates.items() + updates3.items())
    else:
        params = lasagne.layers.get_all_params(l_out, trainable=True)
        updates = optimizer.adam(loss_or_grads=loss,
                                 params=params,
                                 learning_rate=LR)

    test_output = lasagne.layers.get_output(l_out, deterministic=True)
    test_loss = T.mean(T.sqr(T.maximum(0., 1. - target * test_output)))
    test_err = T.mean(T.neq(T.argmax(test_output, axis=1),
                            T.argmax(target, axis=1)),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training loss:
    train_fn = theano.function([input, target, LR], loss, updates=updates)
    val_fn = theano.function([input, target], [test_loss, test_err])

    ## load data
    print('Loading SVHN dataset')

    train_set = SVHN(
        which_set='splitted_train',
        # which_set= 'valid',
        path="${SVHN_LOCAL_PATH}",
        axes=['b', 'c', 0, 1])

    valid_set = SVHN(which_set='valid',
                     path="${SVHN_LOCAL_PATH}",
                     axes=['b', 'c', 0, 1])

    test_set = SVHN(which_set='test',
                    path="${SVHN_LOCAL_PATH}",
                    axes=['b', 'c', 0, 1])

    # bc01 format
    # print train_set.X.shape
    train_set.X = np.reshape(train_set.X, (-1, 3, 32, 32))
    valid_set.X = np.reshape(valid_set.X, (-1, 3, 32, 32))
    test_set.X = np.reshape(test_set.X, (-1, 3, 32, 32))

    train_set.y = np.array(train_set.y).flatten()
    valid_set.y = np.array(valid_set.y).flatten()
    test_set.y = np.array(test_set.y).flatten()

    # Onehot the targets
    train_set.y = np.float32(np.eye(10)[train_set.y])
    valid_set.y = np.float32(np.eye(10)[valid_set.y])
    test_set.y = np.float32(np.eye(10)[test_set.y])

    # for hinge loss
    train_set.y = 2 * train_set.y - 1.
    valid_set.y = 2 * valid_set.y - 1.
    test_set.y = 2 * test_set.y - 1.

    print('Training...')

    # ipdb.set_trace()
    lab.train(name, method, train_fn, val_fn, batch_size, LR_start, LR_decay,
              num_epochs, train_set.X, train_set.y, valid_set.X, valid_set.y,
              test_set.X, test_set.y)