def main():
    dataset_file = 'mnist.npz'
    if not isfile(dataset_file):
        downloadMnist(dataset_file)
    
    data = dict(np.load(dataset_file))
    data['x_tr'] = data['x_tr'] * 2.0 - 1.0
    data['x_va'] = data['x_va'] * 2.0 - 1.0
    data['x_te'] = data['x_te'] * 2.0 - 1.0
    data['t_tr'] = data['t_tr'].astype(np.int32)
    data['t_va'] = data['t_va'].astype(np.int32)
    data['t_te'] = data['t_te'].astype(np.int32)

    rng = np.random.RandomState()
    srng = RandomStreamsGPU(rng.randint(1, 2147462579, size=(6,)))

    # Setup data loaders
    train_generator = DefaultDataLoader(data['x_tr'], data['t_tr'], 100, rng=rng)
    validation_generator = DefaultDataLoader(data['x_va'], data['t_va'], 100)
    test_generator = DefaultDataLoader(data['x_te'], data['t_te'], 100)

    # Load real-valued model parameters for initialization
    init_model_file = 'mnist_pi_model_ternary_tanh.npz'
    if isfile(init_model_file):
        initial_parameters = dict(np.load(init_model_file))
        print 'Loading initial parameters from \'%s\'' % (init_model_file)
        print 'Parameters:', [e for e in initial_parameters]
    else:
        raise Exception('Cannot find initial model \'%s\'' % (init_model_file))

    # Create model
    global parameters
    layer, parameters = getMnistPIModel(initial_parameters, rng, srng)

    # Do optimization
    print layer.getMessage()
    cbErrVaDecreased = lambda : cbValidationErrorDecreased()

    global p_vals
    optimizeNetwork(layer,
        loader_tr=train_generator,
        loader_va=validation_generator,
        loader_te=test_generator,
        optimization_algorithm='adam',
        step_size=1e-3,
        step_size_discrete=1e-2,
        step_size_scale_fn={'type'     : 'plateau',
                            'monitor'  : 'ce_va',
                            'cooldown' : 50,
                            'patience' : 10,
                            'factor'   : 0.5},
        n_epochs=500,
        do_bn_updates_after_epoch=True,
        callback_validation_error_decreased=[(cbErrVaDecreased, [])])

    # Store model parameters. The model parameters of the best model according to the validation error are now in
    # p_vals.
    model_file = 'mnist_pi_model_ternary_sign_from_tanh.npz'
    print 'Optimization finished. Storing model parameters to ''%s''' % model_file
    np.savez_compressed(model_file, **p_vals)
Exemplo n.º 2
0
def main():
    dataset_file = 'mnist.npz'
    if not isfile(dataset_file):
        downloadMnist(dataset_file)

    data = dict(np.load(dataset_file))
    data['x_tr'] = data['x_tr'] * 2.0 - 1.0
    data['x_va'] = data['x_va'] * 2.0 - 1.0
    data['x_te'] = data['x_te'] * 2.0 - 1.0
    data['t_tr'] = data['t_tr'].astype(np.int32)
    data['t_va'] = data['t_va'].astype(np.int32)
    data['t_te'] = data['t_te'].astype(np.int32)

    rng = np.random.RandomState()
    srng = RandomStreamsGPU(rng.randint(1, 2147462579, size=(6, )))

    # Setup data loaders
    train_generator = DefaultDataLoader(data['x_tr'],
                                        data['t_tr'],
                                        100,
                                        rng=rng)
    validation_generator = DefaultDataLoader(data['x_va'], data['t_va'], 100)
    test_generator = DefaultDataLoader(data['x_te'], data['t_te'], 100)

    # Create model
    global parameters
    layer, parameters = getMnistPIModel(rng, srng)

    # Do optimization
    print layer.getMessage()
    cbErrVaDecreased = lambda: cbValidationErrorDecreased()

    global p_vals
    optimizeNetwork(layer,
                    loader_tr=train_generator,
                    loader_va=validation_generator,
                    loader_te=test_generator,
                    optimization_algorithm='adam',
                    step_size=1e-3,
                    step_size_scale_fn={
                        'type': 'plateau',
                        'monitor': 'ce_va',
                        'cooldown': 150,
                        'patience': 25,
                        'factor': 0.5
                    },
                    n_epochs=1000,
                    callback_validation_error_decreased=[(cbErrVaDecreased, [])
                                                         ])

    # Store model parameters. The model parameters of the best model according to the validation error are now in
    # p_vals.
    model_file = 'mnist_pi_model_real.npz'
    print 'Optimization finished. Storing model parameters to ' '%s' '' % model_file
    np.savez_compressed(model_file, **p_vals)
def main():
    dataset_file = 'cifar100.npz'
    if not isfile(dataset_file):
        downloadCifar100(dataset_file)

    data = dict(np.load(dataset_file))
    data['x_tr'] = ((data['x_tr'] / 255.0 * 2.0) - 1.0).astype(
        np.float32).reshape(-1, 3, 32, 32)
    data['x_va'] = ((data['x_va'] / 255.0 * 2.0) - 1.0).astype(
        np.float32).reshape(-1, 3, 32, 32)
    data['x_te'] = ((data['x_te'] / 255.0 * 2.0) - 1.0).astype(
        np.float32).reshape(-1, 3, 32, 32)
    data['t_tr'] = data['t_tr'].astype(np.int32)
    data['t_va'] = data['t_va'].astype(np.int32)
    data['t_te'] = data['t_te'].astype(np.int32)

    rng = np.random.RandomState()
    srng = RandomStreamsGPU(rng.randint(1, 2147462579, size=(6, )))

    # Setup data loaders
    train_generator = Cifar10FlipShiftDataLoader(data['x_tr'],
                                                 data['t_tr'],
                                                 100,
                                                 flip_axis=1,
                                                 max_shift=4,
                                                 requires_train=True,
                                                 rng=rng)
    validation_generator = DefaultDataLoader(data['x_va'], data['t_va'], 100)
    test_generator = DefaultDataLoader(data['x_te'], data['t_te'], 100)

    # Create model
    global parameters
    layer, parameters = getCifar100Model(rng, srng)

    # Do optimization
    print layer.getMessage()
    cbErrVaDecreased = lambda: cbValidationErrorDecreased()

    global p_vals
    optimizeNetwork(layer,
                    loader_tr=train_generator,
                    loader_va=validation_generator,
                    loader_te=test_generator,
                    optimization_algorithm='adam',
                    step_size=3e-4,
                    step_size_scale_fn={
                        'type': 'plateau',
                        'monitor': 'ce_va',
                        'cooldown': 50,
                        'patience': 10,
                        'factor': 0.5
                    },
                    n_epochs=500,
                    callback_validation_error_decreased=[(cbErrVaDecreased, [])
                                                         ])

    # Store model parameters. The model parameters of the best model according to the validation error are now in
    # p_vals.
    model_file = 'cifar100_model_real.npz'
    print 'Optimization finished. Storing model parameters to ' '%s' '' % model_file
    np.savez_compressed(model_file, **p_vals)
Exemplo n.º 4
0
def SamplingdenseSDAEexp(state, channel):
    """
    This script launch a SDAE experiment, training in a greedy layer wise fashion.
    The hidden layer activation function is the rectifier activation (i.e. max(0,y)). The reconstruction activation function
    is the sigmoid. The reconstruction cost is the cross-entropy. From one layer to the next we need to scale the
    parameters in order to ensure that the representation is in the interval [0,1].
    The noise of the input layer is a salt and pepper noise ('binomial_NLP'), for deeper layers it is a zero masking
    noise (binomial).
    """
    SavePath = channel.remote_path + '/' if hasattr(
        channel, 'remote_path') else channel.path + '/'

    numpy.random.seed(state.seed)

    Wenc, benc = createWbshared(numpy.random, state.n_inp, state.n_hid, 'enc')
    Wdec, bdec = createWbshared(numpy.random, state.n_hid, state.n_inp, 'dec')

    # Load the entire training data
    full_train = vectosparsemat(data_path + state['path_data'],
                                state['ninputs'])
    full_test = vectosparsemat(data_path + state['path_data_test'],
                               state['ninputs'])
    full_train = full_train[numpy.random.permutation(full_train.shape[0]), :]
    NB_DENSE_train = int(
        numpy.ceil(full_train.shape[0] / float(state['dense_size'])))
    NB_DENSE_test = int(
        numpy.ceil(full_test.shape[0] / float(state['dense_size'])))

    # Create the dense batch shared variable
    train = theano.shared(
        createdensebatch(full_train, state['dense_size'], 0)[0])
    zer_mask_shared = theano.shared(
        numpy.asarray(numpy.random.binomial(n=1,
                                            p=1 - state.zeros,
                                            size=train.value.shape),
                      dtype=theano.config.floatX))
    one_mask_shared = theano.shared(
        numpy.asarray(numpy.random.binomial(n=1,
                                            p=state.ones,
                                            size=train.value.shape),
                      dtype=theano.config.floatX))
    #------------------------------
    state.bestindomain = -1
    state.bestindomainstd = -1
    state.bestindomainval = -1
    state.bestindomainvalstd = -1
    state.bestindomainvalde = -1
    state.bestrec = -1
    state.bestrecde = -1
    state.bestonlinerec = -1
    state.bestonlinerecde = -1
    epochsl = []
    indomain = []
    indomainstd = []
    indomainval = []
    indomainvalstd = []
    rec = []
    recdense = []
    reconline = []
    #-------------------------------

    # Model initialization:
    inp = T.matrix()
    RandomStreams = RandomStreamsGPU(state.seed)
    zer_mask = T.matrix()
    one_mask = T.matrix()

    if state.pattern == 'inp':
        pattern = T.cast((inp + RandomStreams.binomial(
            size=inp.shape, n=1, p=state.ratio, dtype=theano.config.floatX)) >
                         0,
                         dtype=theano.config.floatX)
    elif state.pattern == 'noise':
        pattern = T.cast(
            ((1 - zer_mask) * inp + one_mask + RandomStreams.binomial(
                size=inp.shape, n=1, p=state.ratio,
                dtype=theano.config.floatX)) > 0,
            dtype=theano.config.floatX)
    elif state.pattern == 'inpnoise':
        pattern = T.cast((inp + one_mask + RandomStreams.binomial(
            size=inp.shape, n=1, p=state.ratio, dtype=theano.config.floatX)) >
                         0,
                         dtype=theano.config.floatX)
    elif state.pattern == 'random':
        pattern = RandomStreams.binomial(size=inp.shape,
                                         n=1,
                                         p=state.ratio,
                                         dtype=theano.config.floatX)
    elif state.pattern == None:
        pattern = None
    else:
        assert False

    inp_noise = binomial_NLP_noise(inp, zer_mask, one_mask)
    hid_lin = T.dot(inp_noise, Wenc) + benc
    if state.act == 'rect':
        hid_out = hid_lin * (hid_lin > 0)
    if state.act == 'sigmoid':
        hid_out = T.nnet.sigmoid(hid_lin)
    L1_reg = T.mean(T.sum(hid_out * hid_out, axis=1), axis=0)
    rec_lin = T.dot(hid_out, Wdec) + bdec
    # the sigmoid is inside the cross_entropy function.
    if not hasattr(state, 'scaling'):
        state.scaling = False
    if state.cost == 'CE':
        cost, dum = cross_entropy_sampled_cost(inp, rec_lin, pattern,
                                               state.scaling)
        cost_dense, cost_decoupled_dense = cross_entropy_sampled_cost(
            inp, rec_lin, None)
    if state.cost == 'MSE':
        cost, dum = MSE_sampled_cost(inp, rec_lin, pattern, state.scaling)
        cost_dense, cost_decoupled_dense = MSE_sampled_cost(inp, rec_lin, None)
    if state.regcoef != 0.:
        cost = cost + state.regcoef * L1_reg
    grad = T.grad(cost, [Wenc, Wdec, benc, bdec])
    updates = dict(
        (p, p - state.lr * g) for p, g in zip([Wenc, Wdec, benc, bdec], grad))
    givens = {}
    index = T.lscalar()
    givens.update(
        {inp: train[index * state.batchsize:(index + 1) * state.batchsize]})
    givens.update({
        zer_mask:
        zer_mask_shared[index * state.batchsize:(index + 1) * state.batchsize]
    })
    givens.update({
        one_mask:
        one_mask_shared[index * state.batchsize:(index + 1) * state.batchsize]
    })
    TRAINFUNC = theano.function([index], cost, updates=updates, givens=givens)
    #givens = {}
    #givens.update({inp:train[index*state.batchsizeerr:(index+1)*state.batchsizeerr]})
    #givens.update({zer_mask:zer_mask_shared[index*state.batchsizeerr:(index+1)*state.batchsizeerr]})
    #givens.update({one_mask:one_mask_shared[index*state.batchsizeerr:(index+1)*state.batchsizeerr]})
    #ERRNOISE = theano.function([index],[cost_dense,cost_decoupled_dense], givens = givens)
    givens = {}
    givens.update({
        inp:
        train[index * state.batchsizeerr:(index + 1) * state.batchsizeerr]
    })
    givens.update({
        zer_mask:
        zer_mask_shared[index * state.batchsizeerr:(index + 1) *
                        state.batchsizeerr]
    })
    givens.update({
        one_mask:
        one_mask_shared[index * state.batchsizeerr:(index + 1) *
                        state.batchsizeerr]
    })
    ERR = theano.function([index], [cost_dense, cost_decoupled_dense],
                          givens=givens)

    # Train the current DAE
    for epoch in range(state['nepochs']):
        # Load sequentially dense batches of the training data
        reconstruction_error_batch = 0
        update_count1 = 0
        for batchnb in range(NB_DENSE_train):
            train.container.value[:], realsize = createdensebatch(
                full_train, state.dense_size, batchnb)
            zer_mask_shared.container.value[:] = numpy.asarray(
                numpy.random.binomial(n=1,
                                      p=1 - state.zeros,
                                      size=train.value.shape),
                dtype=theano.config.floatX)
            one_mask_shared.container.value[:] = numpy.asarray(
                numpy.random.binomial(n=1,
                                      p=state.ones,
                                      size=train.value.shape),
                dtype=theano.config.floatX)
            for j in range(realsize / state.batchsize):
                tmp = TRAINFUNC(j)
                reconstruction_error_batch += tmp
                update_count1 += 1
            print >> sys.stderr, "\t\tAt depth %d, epoch %d, finished training over batch %s" % (
                1, epoch + 1, batchnb + 1)
            print >> sys.stderr, "\t\tMean reconstruction error %s" % (
                reconstruction_error_batch / float(update_count1))
        print >> sys.stderr, '...finished training epoch #%s' % (epoch + 1)
        full_train = full_train[
            numpy.random.permutation(full_train.shape[0]), :]
        if epoch + 1 in state.epochs:
            #rec test err
            update_count2 = 0
            test_recerr = 0
            test_recerrd = numpy.zeros((state.ninputs, ))
            for batchnb in range(NB_DENSE_test):
                train.container.value[:], realsize = createdensebatch(
                    full_test, state.dense_size, batchnb)
                zer_mask_shared.container.value[:] = numpy.ones(
                    train.value.shape, dtype=theano.config.floatX)
                one_mask_shared.container.value[:] = numpy.zeros(
                    train.value.shape, dtype=theano.config.floatX)
                for j in range(realsize / state.batchsizeerr):
                    # Update function
                    recerr, recerrd = ERR(j)
                    test_recerr += recerr
                    test_recerrd += recerrd
                    update_count2 += 1
            if not os.path.isdir(SavePath):
                os.mkdir(SavePath)
            modeldir = os.path.join(SavePath, 'currentmodel')
            if not os.path.isdir(modeldir):
                os.mkdir(modeldir)
            f = open(modeldir + '/params.pkl', 'w')
            cPickle.dump(Wenc.value, f, -1)
            cPickle.dump(Wdec.value, f, -1)
            cPickle.dump(benc.value, f, -1)
            cPickle.dump(bdec.value, f, -1)
            f.close()
            createdatafiles(1, Wenc, benc, SavePath, state.small,
                            state.ninputs, state.act)
            currentresults = validtest(1, SavePath, state.small, state.folds,
                                       state.ninputs)
            epochsl += [epoch + 1]
            indomainval += [currentresults[1][0]]
            indomainvalstd += [currentresults[1][1]]
            indomain += [currentresults[0][0]]
            indomainstd += [currentresults[0][1]]
            rec += [test_recerr / float(update_count2)]
            recdense += [test_recerrd / float(update_count2)]
            reconline += [reconstruction_error_batch / float(update_count1)]
            print '###### RESULTS :'
            print 'Depth:', 1
            print 'Epoch:', epoch + 1
            print 'Online Reconstruction:', reconstruction_error_batch / float(
                update_count1)
            print 'Reconstruction:', test_recerr / float(update_count2)
            print 'in-domain val:', currentresults[1][
                0], '+/-', currentresults[1][1]
            print 'in-domain test:', currentresults[0][
                0], '+/-', currentresults[0][1]
            print ' '
            f = open('results.pkl', 'w')
            cPickle.dump(epochsl, f, -1)
            cPickle.dump(rec, f, -1)
            cPickle.dump(recdense, f, -1)
            cPickle.dump(reconline, f, -1)
            cPickle.dump((indomainval, indomainvalstd), f, -1)
            cPickle.dump((indomain, indomainstd), f, -1)
            f.close()
            if test_recerr / float(
                    update_count2) < state.bestrec or state.bestrec == -1:
                state.bestrec = test_recerr / float(update_count2)
                state.bestrecde = (1, epoch + 1)
            if reconstruction_error_batch / float(
                    update_count1
            ) < state.bestonlinerec or state.bestonlinerec == -1:
                state.bestonlinerec = reconstruction_error_batch / float(
                    update_count1)
                state.bestonlinerecde = (1, epoch + 1)
            if currentresults[1][
                    0] < state.bestindomainval or state.bestindomainval == -1:
                modeldir = os.path.join(SavePath, 'bestmodel')
                if not os.path.isdir(modeldir):
                    os.mkdir(modeldir)
                f = open(modeldir + '/params.pkl', 'w')
                cPickle.dump(Wenc.value, f, -1)
                cPickle.dump(Wdec.value, f, -1)
                cPickle.dump(benc.value, f, -1)
                cPickle.dump(bdec.value, f, -1)
                f.close()
                state.bestindomain = currentresults[0][0]
                state.bestindomainstd = currentresults[0][1]
                state.bestindomainval = currentresults[1][0]
                state.bestindomainvalstd = currentresults[1][1]
                state.bestindomainvalde = (1, epoch + 1)
        state.currentepoch = epoch + 1
        channel.save()
    return channel.COMPLETE