def get_denoising_autoencoder(structure):
    n_input, n_output = structure
    curruptor = BinomialCorruptor(corruption_level=0.5)
    config = {
        'corruptor': curruptor,
        'nhid': n_output,
        'nvis': n_input,
        'tied_weights': Trueg,
        'act_enc': 'tanh',
        'act_dec': 'sigmoid',
        'irange': 0.001,
    }
    return DenoisingAutoencoder(**config)
示例#2
0
def main_train(epochs,
               batchsize,
               solution='',
               sparse_penalty=0,
               sparsityTarget=0,
               sparsityTargetPenalty=0):

    # Experiment specific arguments
    conf_dataset = {
        'dataset': 'avicenna',
        'expname': 'dummy',  # Used to create the submission file
        'transfer': True,
        'normalize': True,  # (Default = True)
        'normalize_on_the_fly': False,  # (Default = False)
        'randomize_valid': True,  # (Default = True)
        'randomize_test': True,  # (Default = True)
        'saving_rate': 0,  # (Default = 0)
        'savedir': './outputs',
    }

    # First layer = PCA-75 whiten
    pca_layer = {
        'name': '1st-PCA',
        'num_components': 75,
        'min_variance': -50,
        'whiten': True,
        'pca_class': 'CovEigPCA',
        # Training properties
        'proba': [1, 0, 0],
        'savedir': './outputs',
    }

    # Load the dataset
    data = utils.load_data(conf_dataset)

    if conf_dataset['transfer']:
        # Data for the ALC proxy
        label = data[3]
        data = data[:3]

    # First layer : train or load a PCA
    pca = create_pca(conf_dataset, pca_layer, data, model=pca_layer['name'])
    data = [
        utils.sharedX(pca.function()(set.get_value(borrow=True)), borrow=True)
        for set in data
    ]
    '''
    if conf_dataset['transfer']:
        data_train, label_train = utils.filter_labels(data[0], label)
      
        alc = embed.score(data_train, label_train)
        print '... resulting ALC on train (for PCA) is', alc
    '''

    nvis = utils.get_constant(data[0].shape[1]).item()

    conf = {
        'corruption_level': 0.1,
        'nhid': 200,
        'nvis': nvis,
        'anneal_start': 100,
        'base_lr': 0.001,
        'tied_weights': True,
        'act_enc': 'sigmoid',
        'act_dec': None,
        #'lr_hb': 0.10,
        #'lr_vb': 0.10,
        'tied_weights': True,
        'solution': solution,
        'sparse_penalty': sparse_penalty,
        'sparsityTarget': sparsityTarget,
        'sparsityTargetPenalty': sparsityTargetPenalty,
        'irange': 0,
    }

    # A symbolic input representing your minibatch.
    minibatch = tensor.matrix()

    # Allocate a denoising autoencoder with binomial noise corruption.
    corruptor = GaussianCorruptor(conf['corruption_level'])
    da = DenoisingAutoencoder(corruptor, conf['nvis'], conf['nhid'],
                              conf['act_enc'], conf['act_dec'],
                              conf['tied_weights'], conf['solution'],
                              conf['sparse_penalty'], conf['sparsityTarget'],
                              conf['sparsityTargetPenalty'])

    # Allocate an optimizer, which tells us how to update our model.
    # TODO: build the cost another way
    cost = SquaredError(da)(minibatch, da.reconstruct(minibatch)).mean()
    trainer = SGDOptimizer(da, conf['base_lr'], conf['anneal_start'])
    updates = trainer.cost_updates(cost)

    # Finally, build a Theano function out of all this.
    train_fn = theano.function([minibatch], cost, updates=updates)

    # Suppose we want minibatches of size 10
    proba = utils.getboth(conf, pca_layer, 'proba')
    iterator = BatchIterator(data, proba, batchsize)

    # Here's a manual training loop. I hope to have some classes that
    # automate this a litle bit.
    final_cost = 0
    for epoch in xrange(epochs):
        c = []
        for minibatch_data in iterator:
            minibatch_err = train_fn(minibatch_data)
            c.append(minibatch_err)
        final_cost = numpy.mean(c)
        print "epoch %d, cost : %f" % (epoch, final_cost)

    print '############################## Fin de l\'experience ############################'
    print 'Calcul de l\'ALC : '
    if conf_dataset['transfer']:
        data_train, label_train = utils.filter_labels(data[0], label)
        alc = embed.score(data_train, label_train)

        print 'Solution : ', solution
        print 'sparse_penalty = ', sparse_penalty
        print 'sparsityTarget = ', sparsityTarget
        print 'sparsityTargetPenalty = ', sparsityTargetPenalty
        print 'Final denoising error is : ', final_cost
        print '... resulting ALC on train is', alc
        return (alc, final_cost)
示例#3
0
def main_train(work_dir="../results/avicenna/",
               corruption_level=0.3,
               nvis=75,
               nhid=600,
               tied_weights=True,
               act_enc="sigmoid",
               act_dec=None,
               max_epochs=2,
               learning_rate=0.001,
               batch_size=20,
               monitoring_batches=5,
               save_freq=1,
               n_components_trans_pca=7):

    conf = {
        'corruption_level': corruption_level,
        'nvis': nvis,
        'nhid': nhid,
        'tied_weights': tied_weights,
        'act_enc': act_enc,
        'act_dec': act_dec,
        'max_epochs': max_epochs,
        'learning_rate': learning_rate,
        'batch_size': batch_size,
        'monitoring_batches': monitoring_batches,
        'save_freq': save_freq,
        'n_components_trans_pca': n_components_trans_pca
    }

    start = time.clock()

    ###############   TRAIN THE DAE
    train_file = work_dir + "train_pca" + str(conf['nvis']) + ".npy"
    save_path = work_dir + "train_pca" + str(conf['nvis']) + "_dae" + str(
        conf['nhid']) + "_model.pkl"

    trainset = NpyDataset(file=train_file)
    trainset.yaml_src = 'script'
    corruptor = BinomialCorruptor(corruption_level=conf['corruption_level'])
    dae = DenoisingAutoencoder(nvis=conf['nvis'],
                               nhid=conf['nhid'],
                               tied_weights=conf['tied_weights'],
                               corruptor=corruptor,
                               act_enc=conf['act_enc'],
                               act_dec=conf['act_dec'])
    cost = MeanSquaredReconstructionError()
    termination_criterion = EpochCounter(max_epochs=conf['max_epochs'])
    algorithm = UnsupervisedExhaustiveSGD(
        learning_rate=conf['learning_rate'],
        batch_size=conf['batch_size'],
        monitoring_batches=conf['monitoring_batches'],
        monitoring_dataset=trainset,
        cost=cost,
        termination_criterion=termination_criterion)

    train_obj = Train(dataset=trainset,
                      model=dae,
                      algorithm=algorithm,
                      save_freq=conf['save_freq'],
                      save_path=save_path)
    train_obj.main_loop()

    ###############   APPLY THE MODEL ON THE TRAIN DATASET
    print("Applying the model on the train dataset...")
    model = load(save_path)
    save_train_path = work_dir + "train_pca" + str(
        conf['nvis']) + "_dae" + str(conf['nhid']) + ".npy"
    dump_obj = FeatureDump(encoder=model,
                           dataset=trainset,
                           path=save_train_path)
    dump_obj.main_loop()

    ###############   APPLY THE MODEL ON THE VALID DATASET
    print("Applying the model on the valid dataset...")
    valid_file = work_dir + "valid_pca" + str(conf['nvis']) + ".npy"

    validset = NpyDataset(file=valid_file)
    validset.yaml_src = 'script'
    save_valid_path = work_dir + "valid_pca" + str(
        conf['nvis']) + "_dae" + str(conf['nhid']) + ".npy"
    dump_obj = FeatureDump(encoder=model,
                           dataset=validset,
                           path=save_valid_path)
    dump_obj.main_loop()

    ###############   APPLY THE MODEL ON THE TEST DATASET
    print("Applying the model on the test dataset...")
    test_file = work_dir + "test_pca" + str(conf['nvis']) + ".npy"

    testset = NpyDataset(file=test_file)
    testset.yaml_src = 'script'
    save_test_path = work_dir + "test_pca" + str(conf['nvis']) + "_dae" + str(
        conf['nhid']) + ".npy"
    dump_obj = FeatureDump(encoder=model, dataset=testset, path=save_test_path)
    dump_obj.main_loop()

    ###############   COMPUTE THE ALC SCORE ON VALIDATION SET
    valid_data = ift6266h12.load_npy(save_valid_path)
    label_data = ift6266h12.load_npy(
        '/data/lisa/data/UTLC/numpy_data/avicenna_valid_y.npy')
    alc_1 = score(valid_data, label_data)

    ###############   APPLY THE TRANSDUCTIVE PCA
    test_data = ift6266h12.load_npy(save_test_path)
    trans_pca = PCA(n_components=conf['n_components_trans_pca'])
    final_valid = trans_pca.fit_transform(valid_data)
    final_test = trans_pca.fit_transform(test_data)

    save_valid_path = work_dir + "valid_pca" + str(
        conf['nvis']) + "_dae" + str(conf['nhid']) + "_tpca" + str(
            conf['n_components_trans_pca']) + ".npy"
    save_test_path = work_dir + "test_pca" + str(conf['nvis']) + "_dae" + str(
        conf['nhid']) + "_tpca" + str(conf['n_components_trans_pca']) + ".npy"

    np.save(save_valid_path, final_valid)
    np.save(save_test_path, final_test)

    ###############   COMPUTE THE NEW ALC SCORE ON VALIDATION SET
    alc_2 = score(final_valid, label_data)

    ###############   OUTPUT AND RETURN THE RESULTS
    timeSpent = ((time.clock() - start) / 60.)
    print 'FINAL RESULTS (PCA-' + str(conf['nvis']) + ' DAE-' + str(conf['nhid']) + ' TransPCA-' + str(conf['n_components_trans_pca']) + ') ALC after DAE: ', alc_1, ' FINAL ALC: ', alc_2, \
            ' Computed in %5.2f min' % (timeSpent)

    return timeSpent, alc_1, alc_2
示例#4
0
        'tied_weights': False,
        'solution': 'l1_penalty',
        'sparse_penalty': 0.01,
        'sparsity_target': 0.1,
        'sparsity_target_penalty': 0.001,
        'irange': 0.001,
    }

    # A symbolic input representing your minibatch.
    minibatch = tensor.matrix()

    # Allocate a denoising autoencoder with binomial noise corruption.
    corruptor = GaussianCorruptor(conf['corruption_level'])
    da = DenoisingAutoencoder(corruptor, conf['nvis'], conf['nhid'],
                              conf['act_enc'], conf['act_dec'],
                              conf['tied_weights'], conf['solution'],
                              conf['sparse_penalty'], conf['sparsity_target'],
                              conf['sparsity_target_penalty'])

    # Allocate an optimizer, which tells us how to update our model.
    # TODO: build the cost another way
    cost = SquaredError(da)(minibatch, da.reconstruct(minibatch)).mean()
    trainer = SGDOptimizer(da, conf['base_lr'], conf['anneal_start'])
    updates = trainer.cost_updates(cost)

    # Finally, build a Theano function out of all this.
    train_fn = theano.function([minibatch], cost, updates=updates)

    # Suppose we want minibatches of size 20
    batchsize = 20
示例#5
0
文件: kmeans.py 项目: LeeEdel/pylearn
        #'lr_hb': 0.10,
        #'lr_vb': 0.10,
        'irange': 0.001,
        #note the kmean hyper-parameter here
        'kmeans_k': 2
    }
    print '== training =='
    # A symbolic input representing your minibatch.
    minibatch = tensor.matrix()

    # Allocate a denoising autoencoder with binomial noise corruption.
    corruptor = GaussianCorruptor(corruption_level=conf['corruption_level'])
    da = DenoisingAutoencoder(corruptor,
                              conf['nvis'],
                              conf['nhid'],
                              conf['act_enc'],
                              conf['act_dec'],
                              tied_weights=conf['tied_weights'],
                              irange=conf['irange'])

    # Allocate an optimizer, which tells us how to update our model.
    # TODO: build the cost another way
    cost = SquaredError(da)(minibatch, da.reconstruct(minibatch)).mean()
    trainer = SGDOptimizer(da.params(), conf['base_lr'], conf['anneal_start'])

    # Finally, build a Theano function out of all this.
    train_fn = theano.function([minibatch],
                               cost,
                               updates=trainer.cost_updates(cost))

    # Suppose we want minibatches of size 10