Exemplo n.º 1
0
    def train(self, model, dataset):
        minibatch = tensor.matrix()

        cost = SquaredError(model)(minibatch, model.reconstruct(minibatch)).mean()
        trainer = SGDOptimizer(model, self.base_lr, self.anneal_start)
        updates = trainer.cost_updates(cost)

        train_fn = function([minibatch], cost, updates = updates)

        data = dataset.get_design_matrix()

        for epoch in xrange(self.num_epochs):
            for offset in xrange(0, data.shape[0], self.batch_size):
                minibatch_err = train_fn(data[offset:(offset+self.batch_size)])
                print "epoch %d, batch %d-%d: %f" % (epoch, offset, offset + self.batch_size -1, minibatch_err)
Exemplo n.º 2
0
def main_train(epochs, batchsize, solution="", sparse_penalty=0, sparsityTarget=0, sparsityTargetPenalty=0):

    # Experiment specific arguments
    conf_dataset = {
        "dataset": "avicenna",
        "expname": "dummy",  # Used to create the submission file
        "transfer": True,
        "normalize": True,  # (Default = True)
        "normalize_on_the_fly": False,  # (Default = False)
        "randomize_valid": True,  # (Default = True)
        "randomize_test": True,  # (Default = True)
        "saving_rate": 0,  # (Default = 0)
        "savedir": "./outputs",
    }

    # First layer = PCA-75 whiten
    pca_layer = {
        "name": "1st-PCA",
        "num_components": 75,
        "min_variance": -50,
        "whiten": True,
        "pca_class": "CovEigPCA",
        # Training properties
        "proba": [1, 0, 0],
        "savedir": "./outputs",
    }

    # Load the dataset
    data = utils.load_data(conf_dataset)

    if conf_dataset["transfer"]:
        # Data for the ALC proxy
        label = data[3]
        data = data[:3]

    # First layer : train or load a PCA
    pca = create_pca(conf_dataset, pca_layer, data, model=pca_layer["name"])
    data = [utils.sharedX(pca.function()(set.get_value(borrow=True)), borrow=True) for set in data]
    """
    if conf_dataset['transfer']:
        data_train, label_train = utils.filter_labels(data[0], label)
      
        alc = embed.score(data_train, label_train)
        print '... resulting ALC on train (for PCA) is', alc
    """

    nvis = utils.get_constant(data[0].shape[1]).item()

    conf = {
        "corruption_level": 0.1,
        "nhid": 200,
        "nvis": nvis,
        "anneal_start": 100,
        "base_lr": 0.001,
        "tied_weights": True,
        "act_enc": "sigmoid",
        "act_dec": None,
        #'lr_hb': 0.10,
        #'lr_vb': 0.10,
        "tied_weights": True,
        "solution": solution,
        "sparse_penalty": sparse_penalty,
        "sparsityTarget": sparsityTarget,
        "sparsityTargetPenalty": sparsityTargetPenalty,
        "irange": 0,
    }

    # A symbolic input representing your minibatch.
    minibatch = tensor.matrix()

    # Allocate a denoising autoencoder with binomial noise corruption.
    corruptor = GaussianCorruptor(conf["corruption_level"])
    da = DenoisingAutoencoder(
        corruptor,
        conf["nvis"],
        conf["nhid"],
        conf["act_enc"],
        conf["act_dec"],
        conf["tied_weights"],
        conf["solution"],
        conf["sparse_penalty"],
        conf["sparsityTarget"],
        conf["sparsityTargetPenalty"],
    )

    # Allocate an optimizer, which tells us how to update our model.
    # TODO: build the cost another way
    cost = SquaredError(da)(minibatch, da.reconstruct(minibatch)).mean()
    trainer = SGDOptimizer(da, conf["base_lr"], conf["anneal_start"])
    updates = trainer.cost_updates(cost)

    # Finally, build a Theano function out of all this.
    train_fn = theano.function([minibatch], cost, updates=updates)

    # Suppose we want minibatches of size 10
    proba = utils.getboth(conf, pca_layer, "proba")
    iterator = BatchIterator(data, proba, batchsize)

    # Here's a manual training loop. I hope to have some classes that
    # automate this a litle bit.
    final_cost = 0
    for epoch in xrange(epochs):
        c = []
        for minibatch_data in iterator:
            minibatch_err = train_fn(minibatch_data)
            c.append(minibatch_err)
        final_cost = numpy.mean(c)
        print "epoch %d, cost : %f" % (epoch, final_cost)

    print "############################## Fin de l'experience ############################"
    print "Calcul de l'ALC : "
    if conf_dataset["transfer"]:
        data_train, label_train = utils.filter_labels(data[0], label)
        alc = embed.score(data_train, label_train)

        print "Solution : ", solution
        print "sparse_penalty = ", sparse_penalty
        print "sparsityTarget = ", sparsityTarget
        print "sparsityTargetPenalty = ", sparsityTargetPenalty
        print "Final denoising error is : ", final_cost
        print "... resulting ALC on train is", alc
        return (alc, final_cost)
Exemplo n.º 3
0
    # A symbolic input representing your minibatch.
    minibatch = tensor.matrix()

    # Allocate a denoising autoencoder with binomial noise corruption.
    corruptor = GaussianCorruptor(conf['corruption_level'])
    da = DenoisingAutoencoder(corruptor, conf['nvis'], conf['nhid'],
                              conf['act_enc'], conf['act_dec'],
                              conf['tied_weights'], conf['solution'],
                              conf['sparse_penalty'], conf['sparsity_target'],
                              conf['sparsity_target_penalty'])

    # Allocate an optimizer, which tells us how to update our model.
    # TODO: build the cost another way
    cost = SquaredError(da)(minibatch, da.reconstruct(minibatch)).mean()
    trainer = SGDOptimizer(da, conf['base_lr'], conf['anneal_start'])
    updates = trainer.cost_updates(cost)

    # Finally, build a Theano function out of all this.
    train_fn = theano.function([minibatch], cost, updates=updates)

    # Suppose we want minibatches of size 20
    batchsize = 20

    # Here's a manual training loop. I hope to have some classes that
    # automate this a litle bit.
    for epoch in xrange(5):
        for offset in xrange(0, data.shape[0], batchsize):
            minibatch_err = train_fn(data[offset:(offset + batchsize)])
            print "epoch %d, batch %d-%d: %f" % \
                    (epoch, offset, offset + batchsize - 1, minibatch_err)
Exemplo n.º 4
0
    MyCorruptor = framework.corruption.get(name)
    corruptor = MyCorruptor(layer.get('corruption_level', 0))

    # Allocate an denoising or contracting autoencoder
    MyAutoencoder = framework.autoencoder.get(clsname)
    ae = MyAutoencoder.fromdict(layer, corruptor=corruptor)

    # Allocate an optimizer, which tells us how to update our model.
    MyCost = framework.cost.get(layer['cost_class'])
    varcost = MyCost(ae)(minibatch, ae.reconstruct(minibatch))
    if isinstance(ae, ContractingAutoencoder):
        alpha = layer.get('contracting_penalty', 0.1)
        penalty = alpha * ae.contraction_penalty(minibatch)
        varcost = varcost + penalty
    varcost = varcost.mean()
    trainer = SGDOptimizer(ae, layer['base_lr'], layer['anneal_start'])
    updates = trainer.cost_updates(varcost)

    # Finally, build a Theano function out of all this.
    train_fn = theano.function([minibatch], varcost,
                               updates=updates,
                               name='train_fn')

    # Here's a manual training loop.
    print '... training layer:', clsname
    start_time = time.clock()
    proba = utils.getboth(layer, conf, 'proba')
    iterator = BatchIterator(data, proba, layer['batch_size'])
    saving_counter = 0
    saving_rate = utils.getboth(layer, conf, 'saving_rate', 0)
    for epoch in xrange(layer['epochs']):
Exemplo n.º 5
0
    }
    print '== training =='
    # A symbolic input representing your minibatch.
    minibatch = tensor.matrix()

    # Allocate a denoising autoencoder with binomial noise corruption.
    corruptor = GaussianCorruptor(corruption_level=conf['corruption_level'])
    da = DenoisingAutoencoder(corruptor, conf['nvis'], conf['nhid'], 
                              conf['act_enc'], conf['act_dec'],
                              tied_weights=conf['tied_weights'],
                              irange=conf['irange'])

    # Allocate an optimizer, which tells us how to update our model.
    # TODO: build the cost another way
    cost = SquaredError(da)(minibatch, da.reconstruct(minibatch)).mean()
    trainer = SGDOptimizer(da.params(), conf['base_lr'], conf['anneal_start'])

    # Finally, build a Theano function out of all this.
    train_fn = theano.function([minibatch], cost,
                               updates=trainer.cost_updates(cost))

    # Suppose we want minibatches of size 10
    batchsize = 10

    # Here's a manual training loop. I hope to have some classes that
    # automate this a litle bit.
    for epoch in xrange(10):
        for offset in xrange(0, train_data.shape[0], batchsize):
            minibatch_err = train_fn(train_data[offset:(offset + batchsize)])
            #print "epoch %d, batch %d-%d: %f" % \
                    #(epoch, offset, offset + batchsize - 1, minibatch_err)