def main(args=None): # trainset, validset = get_dataset_timitConsSmall() trainset, validset = get_dataset_timitVowels9Frames_MFCC() n_output = 20 design_matrix = trainset.get_design_matrix() n_input = design_matrix.shape[1] # build layers layers = [] structure = [[n_input, 500], [500, 500], [500, 500], [500, n_output]] # layer 0: gaussianRBM layers.append(get_grbm(structure[0])) # # layer 1: denoising AE # layers.append(get_denoising_autoencoder(structure[1])) # # layer 2: AE # layers.append(get_autoencoder(structure[2])) # # layer 3: logistic regression used in supervised training # layers.append(get_logistic_regressor(structure[3])) # layer 1: gaussianRBM layers.append(get_grbm(structure[1])) # layer 2: gaussianRBM layers.append(get_grbm(structure[2])) # layer 3: logistic regression used in supervised training # layers.append(get_logistic_regressor(structure[3])) layers.append(get_mlp_softmax(structure[3])) # construct training sets for different layers trainset = [trainset, TransformerDataset(raw=trainset, transformer=layers[0]), TransformerDataset(raw=trainset, transformer=StackedBlocks(layers[0:2])), TransformerDataset(raw=trainset, transformer=StackedBlocks(layers[0:3]))] # construct layer trainers layer_trainers = [] layer_trainers.append(get_layer_trainer_sgd_rbm0(layers[0], trainset[0])) # layer_trainers.append(get_layer_trainer_sgd_autoencoder(layers[1], trainset[1])) # layer_trainers.append(get_layer_trainer_sgd_autoencoder(layers[2], trainset[2])) layer_trainers.append(get_layer_trainer_sgd_rbm1(layers[1], trainset[1])) layer_trainers.append(get_layer_trainer_sgd_rbm2(layers[2], trainset[2])) # layer_trainers.append(get_layer_trainer_logistic(layers[3], trainset[3])) layer_trainers.append(get_layer_trainer_softmax(layers[3], trainset[3])) # unsupervised pretraining for i, layer_trainer in enumerate(layer_trainers[0:3]): print('-----------------------------------') print(' Unsupervised training layer %d, %s' % (i, layers[i].__class__)) print('-----------------------------------') layer_trainer.main_loop() print('\n') print('------------------------------------------------------') print(' Unsupervised training done! Start supervised training...') print('------------------------------------------------------') print('\n')
def build_stacked_ae(nvis, nhids, act_enc, act_dec, tied_weights=False, irange=1e-3, rng=None, corruptor=None, contracting=False): """ .. todo:: WRITEME properly Allocate a stack of autoencoders. """ rng = make_np_rng(rng, which_method='randn') layers = [] final = {} # "Broadcast" arguments if they are singular, or accept sequences if # they are the same length as nhids for c in [ 'corruptor', 'contracting', 'act_enc', 'act_dec', 'tied_weights', 'irange' ]: if type(locals()[c]) is not str and hasattr(locals()[c], '__len__'): assert len(nhids) == len(locals()[c]) final[c] = locals()[c] else: final[c] = [locals()[c]] * len(nhids) # The number of visible units in each layer is the initial input # size and the first k-1 hidden unit sizes. nviss = [nvis] + nhids[:-1] seq = izip( nhids, nviss, final['act_enc'], final['act_dec'], final['corruptor'], final['contracting'], final['tied_weights'], final['irange'], ) # Create each layer. for (nhid, nvis, act_enc, act_dec, corr, cae, tied, ir) in seq: args = (nvis, nhid, act_enc, act_dec, tied, ir, rng) if cae and corr is not None: raise ValueError("Can't specify denoising and contracting " "objectives simultaneously") elif cae: autoenc = ContractiveAutoencoder(*args) elif corr is not None: autoenc = DenoisingAutoencoder(corr, *args) else: autoenc = Autoencoder(*args) layers.append(autoenc) # Create the stack return StackedBlocks(layers)
def test_stackedblocks_without_params(): """ Test StackedBlocks when not all layers have trainable params """ sb = StackedBlocks([Block(), Block()]) assert sb._params is None
def test_stackedblocks_with_params(): """ Test StackedBlocks when all layers have trainable params """ aes = [ Autoencoder(100, 50, 'tanh', 'tanh'), Autoencoder(50, 10, 'tanh', 'tanh') ] sb = StackedBlocks(aes) _params = set([p for l in sb._layers for p in l._params]) assert sb._params == _params
def __init__(self, layers): stacked_blocks = [] n_folds = len(layers[0]) assert all([len(layer) == n_folds for layer in layers]) # stack the k-th block from each layer for k in xrange(n_folds): this_blocks = [] for i, layer in enumerate(layers): this_blocks.append(layer[k]) this_stacked_blocks = StackedBlocks(this_blocks) stacked_blocks.append(this_stacked_blocks) # _folds contains a StackedBlocks instance for each CV fold self._folds = stacked_blocks
def main(args=None): """ args is the list of arguments that will be passed to the option parser. The default (None) means use sys.argv[1:]. """ parser = OptionParser() parser.add_option("-d", "--data", dest="dataset", default="toy", help="specify the dataset, either cifar10, mnist or toy") (options,args) = parser.parse_args(args=args) if options.dataset == 'toy': trainset, testset = get_dataset_toy() n_output = 2 elif options.dataset == 'cifar10': trainset, testset, = get_dataset_cifar10() n_output = 10 elif options.dataset == 'mnist': trainset, testset, = get_dataset_mnist() n_output = 10 else: NotImplementedError() design_matrix = trainset.get_design_matrix() n_input = design_matrix.shape[1] # build layers layers = [] structure = [[n_input, 10], [10, 50], [50, 100], [100, n_output]] # layer 0: gaussianRBM layers.append(get_grbm(structure[0])) # layer 1: denoising AE layers.append(get_denoising_autoencoder(structure[1])) # layer 2: AE layers.append(get_autoencoder(structure[2])) # layer 3: logistic regression used in supervised training layers.append(get_logistic_regressor(structure[3])) # construct training sets for different layers trainset = [ trainset , TransformerDataset( raw = trainset, transformer = layers[0] ), TransformerDataset( raw = trainset, transformer = StackedBlocks( layers[0:2] )), TransformerDataset( raw = trainset, transformer = StackedBlocks( layers[0:3] )) ] # construct layer trainers layer_trainers = [] layer_trainers.append(get_layer_trainer_sgd_rbm(layers[0], trainset[0])) layer_trainers.append(get_layer_trainer_sgd_autoencoder(layers[1], trainset[1])) layer_trainers.append(get_layer_trainer_sgd_autoencoder(layers[2], trainset[2])) layer_trainers.append(get_layer_trainer_logistic(layers[3], trainset[3])) # unsupervised pretraining for i, layer_trainer in enumerate(layer_trainers[0:3]): print('-----------------------------------') print(' Unsupervised training layer %d, %s'%(i, layers[i].__class__)) print('-----------------------------------') layer_trainer.main_loop() print('\n') print('------------------------------------------------------') print(' Unsupervised training done! Start supervised training...') print('------------------------------------------------------') print('\n') # supervised training layer_trainers[-1].main_loop()
def train_sda(params): input_trainset, trainset_yaml_str = load_yaml_file( os.path.join(os.path.dirname(__file__), 'train_sda_dataset_template.yaml'), params=params, ) log.info('... building the model') # build layers layer_dims = [params.input_length] layer_dims.extend(params.hidden_layers_sizes) layers = [] for i in xrange(1, len(layer_dims)): layer_params = { 'name': 'da' + str(i), 'n_inputs': layer_dims[i - 1], 'n_outputs': layer_dims[i], 'corruption_level': params.pretrain.corruption_levels[i - 1], 'input_range': numpy.sqrt(6. / (layer_dims[i - 1] + layer_dims[i])), 'random_seed': params.random_seed, } layers.append( load_yaml_file( os.path.join(os.path.dirname(__file__), 'train_sda_layer_template.yaml'), params=layer_params, )[0]) # unsupervised pre-training log.info('... pre-training the model') start_time = time.clock() for i in xrange(len(layers)): # reset corruption to make sure input is not corrupted for layer in layers: layer.set_corruption_level(0) if i == 0: trainset = input_trainset elif i == 1: trainset = TransformerDataset(raw=input_trainset, transformer=layers[0]) else: trainset = TransformerDataset(raw=input_trainset, transformer=StackedBlocks( layers[0:i])) # set corruption for layer to train layers[i].set_corruption_level(params.pretrain.corruption_levels[i]) # FIXME: this is not so nice but we have to do it this way as YAML is not flexible enough trainer = get_layer_trainer_sgd_autoencoder( layers[i], trainset, learning_rate=params.pretrain.learning_rate, max_epochs=params.pretrain.epochs, batch_size=params.pretrain.batch_size, name='pre-train' + str(i)) log.info('unsupervised training layer %d, %s ' % (i, layers[i].__class__)) trainer.main_loop() # theano.printing.pydotprint_variables( # layer_trainer.algorithm.sgd_update.maker.fgraph.outputs[0], # outfile='pylearn2-sgd_update.png', # var_with_name_simple=True); end_time = time.clock() log.info('pre-training code ran for {0:.2f}m'.format( (end_time - start_time) / 60.)) if params.untie_weights: # now untie the decoder weights log.info('untying decoder weights') for layer in layers: layer.untie_weights() # construct multi-layer training functions # unsupervised training log.info('... training the model') sdae = None for depth in xrange(1, len(layers) + 1): first_layer_i = len(layers) - depth log.debug('training layers {}..{}'.format(first_layer_i, len(layers) - 1)) group = layers[first_layer_i:len(layers)] # log.debug(group); # reset corruption for layer in layers: layer.set_corruption_level(0) if first_layer_i == 0: trainset = input_trainset elif first_layer_i == 1: trainset = TransformerDataset(raw=input_trainset, transformer=layers[0]) else: trainset = TransformerDataset(raw=input_trainset, transformer=StackedBlocks( layers[0:first_layer_i])) # set corruption for input layer of stack to train # layers[first_layer_i].set_corruption_level(stage2_corruption_levels[first_layer_i]); corruptor = LoggingCorruptor( BinomialCorruptor(corruption_level=params.pretrain_finetune. corruption_levels[first_layer_i]), name='depth {}'.format(depth)) sdae = StackedDenoisingAutoencoder(group, corruptor) trainer = get_layer_trainer_sgd_autoencoder( sdae, trainset, learning_rate=params.pretrain_finetune.learning_rate, max_epochs=params.pretrain_finetune.epochs, batch_size=params.pretrain_finetune.batch_size, name='multi-train' + str(depth)) log.info('unsupervised multi-layer training %d' % (i)) trainer.main_loop() end_time = time.clock() log.info('full training code ran for {0:.2f}m'.format( (end_time - start_time) / 60.)) # save the model model_file = os.path.join(params.experiment_root, 'sda', 'sda_all.pkl') with log_timing(log, 'saving SDA model to {}'.format(model_file)): serial.save(model_file, sdae) if params.untie_weights: # save individual layers for later (with untied weights) for i, layer in enumerate(sdae.autoencoders): layer_file = os.path.join(params.experiment_root, 'sda', 'sda_layer{}_untied.pkl'.format(i)) with log_timing( log, 'saving SDA layer {} model to {}'.format(i, layer_file)): serial.save(layer_file, layer) # save individual layers for later (with tied weights) for i, layer in enumerate(sdae.autoencoders): if params.untie_weights: layer.tie_weights() layer_file = os.path.join(params.experiment_root, 'sda', 'sda_layer{}_tied.pkl'.format(i)) with log_timing( log, 'saving SDA layer {} model to {}'.format(i, layer_file)): serial.save(layer_file, layer) log.info('done') return sdae
def train_SdA(config, dataset): ## load config hidden_layers_sizes = config.get('hidden_layers_sizes', [10, 10]) corruption_levels = config.get('corruption_levels', [0.1, 0.2]) stage2_corruption_levels = config.get('stage2_corruption_levels', [0.1, 0.1]) pretrain_epochs = config.get('pretrain_epochs', 10) pretrain_lr = config.get('pretrain_learning_rate', 0.001) finetune_epochs = config.get('finetune_epochs', 10) finetune_lr = config.get('finetune_learning_rate', 0.01) batch_size = config.get('batch_size', 10) monitoring_batches = config.get('monitoring_batches', 5) output_path = config.get('output_path', './') input_trainset = dataset design_matrix = input_trainset.get_design_matrix() # print design_matrix.shape; n_input = design_matrix.shape[1] log.info('done') log.debug('input dimensions : {0}'.format(n_input)) log.debug('training examples: {0}'.format(design_matrix.shape[0])) # numpy random generator # numpy_rng = numpy.random.RandomState(89677) log.info('... building the model') # build layers layer_dims = [n_input] layer_dims.extend(hidden_layers_sizes) layers = [] for i in xrange(1, len(layer_dims)): structure = [layer_dims[i - 1], layer_dims[i]] layers.append( create_denoising_autoencoder(structure, corruption=corruption_levels[i - 1])) # unsupervised pre-training log.info('... pre-training the model') start_time = time.clock() for i in xrange(len(layers)): # reset corruption to make sure input is not corrupted for layer in layers: layer.set_corruption_level(0) if i == 0: trainset = input_trainset elif i == 1: trainset = TransformerDataset(raw=input_trainset, transformer=layers[0]) else: trainset = TransformerDataset(raw=input_trainset, transformer=StackedBlocks( layers[0:i])) # set corruption for layer to train layers[i].set_corruption_level(corruption_levels[i]) trainer = get_layer_trainer_sgd_autoencoder( layers[i], trainset, learning_rate=pretrain_lr, max_epochs=pretrain_epochs, batch_size=batch_size, monitoring_batches=monitoring_batches, name='pre-train' + str(i)) log.info('unsupervised training layer %d, %s ' % (i, layers[i].__class__)) trainer.main_loop() # theano.printing.pydotprint_variables( # layer_trainer.algorithm.sgd_update.maker.fgraph.outputs[0], # outfile='pylearn2-sgd_update.png', # var_with_name_simple=True); end_time = time.clock() log.info('pre-training code ran for {0:.2f}m'.format( (end_time - start_time) / 60.)) # now untie the decoder weights log.info('untying decoder weights') for layer in layers: layer.untie_weights() # construct multi-layer training fuctions # unsupervised training log.info('... training the model') sdae = None for depth in xrange(1, len(layers) + 1): first_layer_i = len(layers) - depth log.debug('training layers {}..{}'.format(first_layer_i, len(layers) - 1)) group = layers[first_layer_i:len(layers)] # log.debug(group); # reset corruption for layer in layers: layer.set_corruption_level(0) if first_layer_i == 0: trainset = input_trainset elif first_layer_i == 1: trainset = TransformerDataset(raw=input_trainset, transformer=layers[0]) else: trainset = TransformerDataset(raw=input_trainset, transformer=StackedBlocks( layers[0:first_layer_i])) # set corruption for input layer of stack to train # layers[first_layer_i].set_corruption_level(stage2_corruption_levels[first_layer_i]); corruptor = LoggingCorruptor(BinomialCorruptor( corruption_level=stage2_corruption_levels[first_layer_i]), name='depth {}'.format(depth)) sdae = StackedDenoisingAutoencoder(group, corruptor) trainer = get_layer_trainer_sgd_autoencoder( sdae, trainset, learning_rate=finetune_lr, max_epochs=finetune_epochs, batch_size=batch_size, monitoring_batches=monitoring_batches, name='multi-train' + str(depth)) log.info('unsupervised multi-layer training %d' % (i)) trainer.main_loop() end_time = time.clock() log.info('full training code ran for {0:.2f}m'.format( (end_time - start_time) / 60.)) # save the model model_file = os.path.join(output_path, 'sdae-model.pkl') with log_timing(log, 'saving SDA model to {}'.format(model_file)): serial.save(model_file, sdae) # TODO: pylearn2.train_extensions.best_params.KeepBestParams(model, cost, monitoring_dataset, batch_size) # pylearn2.train_extensions.best_params.MonitorBasedSaveBest log.info('done') return sdae