def main(): parser = OptionParser() parser.add_option("-d", "--data", dest="dataset", default="toy", help="specify the dataset, either cifar10 or toy") (options, args) = parser.parse_args() global SAVE_MODEL if options.dataset == 'toy': trainset, testset = get_dataset_toy() SAVE_MODEL = False elif options.dataset == 'cifar10': trainset, testset, = get_dataset_cifar10() SAVE_MODEL = True design_matrix = trainset.get_design_matrix() n_input = design_matrix.shape[1] # build layers layers = [] structure = [[n_input, 400], [400, 50], [50, 100], [100, 2]] # layer 0: gaussianRBM layers.append(get_grbm(structure[0])) # layer 1: denoising AE layers.append(get_denoising_autoencoder(structure[1])) # layer 2: AE layers.append(get_autoencoder(structure[2])) # layer 3: logistic regression used in supervised training layers.append(get_logistic_regressor(structure[3])) #construct training sets for different layers trainset = [ trainset, TransformerDataset(raw=trainset, transformer=layers[0]), TransformerDataset(raw=trainset, transformer=StackedBlocks(layers[0:2])), TransformerDataset(raw=trainset, transformer=StackedBlocks(layers[0:3])) ] # construct layer trainers layer_trainers = [] layer_trainers.append(get_layer_trainer_sgd_rbm(layers[0], trainset[0])) layer_trainers.append( get_layer_trainer_sgd_autoencoder(layers[1], trainset[1])) layer_trainers.append( get_layer_trainer_sgd_autoencoder(layers[2], trainset[2])) layer_trainers.append(get_layer_trainer_logistic(layers[3], trainset[3])) #unsupervised pretraining for layer_trainer in layer_trainers[0:3]: layer_trainer.main_loop() #supervised training layer_trainers[-1].main_loop()
def build_stacked_RBM(nvis, nhids, batch_size, vis_type='binary', input_mean_vis=None, irange=1e-3, rng=None): """ Note from IG: This method doesn't seem to work correctly with Gaussian RBMs. In general, this is a difficult function to support, because it needs to pass the write arguments to the constructor of many kinds of RBMs. It would probably be better to just construct an instance of pylearn2.models.mlp.MLP with its hidden layers set to instances of pylearn2.models.mlp.RBM_Layer. If anyone is working on this kind of problem, a PR replacing this function with a helper function to make such an MLP would be very welcome. Allocate a StackedBlocks containing RBMs. The visible units of the input RBM can be either binary or gaussian, the other ones are all binary. """ #TODO: not sure this is the right way of dealing with mean_vis. layers = [] assert vis_type in ['binary', 'gaussian'] if vis_type == 'binary': assert input_mean_vis is None elif vis_type == 'gaussian': assert input_mean_vis in (True, False) # The number of visible units in each layer is the initial input # size and the first k-1 hidden unit sizes. nviss = [nvis] + nhids[:-1] seq = izip( xrange(len(nhids)), nhids, nviss, ) for k, nhid, nvis in seq: if k == 0 and vis_type == 'gaussian': rbm = GaussianBinaryRBM(nvis=nvis, nhid=nhid, batch_size=batch_size, irange=irange, rng=rng, mean_vis=input_mean_vis) else: rbm = RBM(nvis - nvis, nhid=nhid, batch_size=batch_size, irange=irange, rng=rng) layers.append(rbm) # Create the stack return StackedBlocks(layers)
def build_stacked_ae(nvis, nhids, act_enc, act_dec, tied_weights=False, irange=1e-3, rng=None, corruptor=None, contracting=False): """Allocate a stack of autoencoders.""" if not hasattr(rng, 'randn'): rng = numpy.random.RandomState(rng) layers = [] final = {} # "Broadcast" arguments if they are singular, or accept sequences if # they are the same length as nhids for c in [ 'corruptor', 'contracting', 'act_enc', 'act_dec', 'tied_weights', 'irange' ]: if type(locals()[c]) is not str and hasattr(locals()[c], '__len__'): assert len(nhids) == len(locals()[c]) final[c] = locals()[c] else: final[c] = [locals()[c]] * len(nhids) # The number of visible units in each layer is the initial input # size and the first k-1 hidden unit sizes. nviss = [nvis] + nhids[:-1] seq = izip( nhids, nviss, final['act_enc'], final['act_dec'], final['corruptor'], final['contracting'], final['tied_weights'], final['irange'], ) # Create each layer. for (nhid, nvis, act_enc, act_dec, corr, cae, tied, ir) in seq: args = (nvis, nhid, act_enc, act_dec, tied, ir, rng) if cae and corr is not None: raise ValueError("Can't specify denoising and contracting " "objectives simultaneously") elif cae: autoenc = ContractiveAutoencoder(*args) elif corr is not None: autoenc = DenoisingAutoencoder(corr, *args) else: autoenc = Autoencoder(*args) layers.append(autoenc) # Create the stack return StackedBlocks(layers)
def build_stacked_RBM(nvis, nhids, batch_size, vis_type='binary', input_mean_vis=None, irange=1e-3, rng=None): """ Allocate a StackedBlocks containing RBMs. The visible units of the input RBM can be either binary or gaussian, the other ones are all binary. """ #TODO: not sure this is the right way of dealing with mean_vis. layers = [] assert vis_type in ['binary', 'gaussian'] if vis_type == 'binary': assert input_mean_vis is None elif vis_type == 'gaussian': assert input_mean_vis in (True, False) # The number of visible units in each layer is the initial input # size and the first k-1 hidden unit sizes. nviss = [nvis] + nhids[:-1] seq = izip( xrange(len(nhids)), nhids, nviss, ) for k, nhid, nvis in seq: if k == 0 and vis_type == 'gaussian': rbm = GaussianBinaryRBM(nvis=nvis, nhid=nhid, batch_size=batch_size, irange=irange, rng=rng, mean_vis=input_mean_vis) else: rbm = RBM(nvis - nvis, nhid=nhid, batch_size=batch_size, irange=irange, rng=rng) layers.append(rbm) # Create the stack return StackedBlocks(layers)
def construct_ae(structure): # some settings irange = 0.1 layers = [] for vsize, hsize in zip(structure[:-1], structure[1:]): # DenoisingAutoencoder / ContractiveAutoencoder / HigherOrderContractiveAutoencoder layers.append( autoencoder.DenoisingAutoencoder( nvis=vsize, nhid=hsize, tied_weights=True, act_enc='sigmoid', act_dec='sigmoid', irange=irange, # for DenoisingAutoencoder / HigherOrderContractiveAutoencoder: corruptor=BinomialCorruptor(0.5), # for HigherOrderContractiveAutoencoder: # num_corruptions=6 )) return StackedBlocks(layers)
def construct_ae(structure): # some settings irange = 0.05 layers = [] for vsize, hsize in zip(structure[:-1], structure[1:]): # DenoisingAutoencoder?, ContractiveAutoencoder?, HigherOrderContractiveAutoencoder? layers.append( autoencoder.ContractiveAutoencoder( # DenoisingAutoencoder #corruptor=BinomialCorruptor(0.5), # HigherOrderContractiveAutoencoder #corruptor=GaussianCorruptor(0.5), #num_corruptions=8, nvis=vsize, nhid=hsize, tied_weights=True, act_enc='sigmoid', act_dec='sigmoid', #act_enc=Rectify(), #act_dec=Rectify(), irange=irange)) return StackedBlocks(layers)
def main(): parser = OptionParser() parser.add_option("-d", "--data", dest="dataset", default="toy", help="specify the dataset, either cifar10, mnist or toy") (options, args) = parser.parse_args() if options.dataset == 'toy': trainset, testset = get_dataset_toy() n_output = 2 elif options.dataset == 'cifar10': trainset, testset, = get_dataset_cifar10() n_output = 10 elif options.dataset == 'mnist': trainset, testset, = get_dataset_mnist() n_output = 10 else: NotImplementedError() design_matrix = trainset.get_design_matrix() n_input = design_matrix.shape[1] # build layers layers = [] structure = [[n_input, 10], [10, 50], [50, 100], [100, n_output]] # layer 0: gaussianRBM layers.append(get_grbm(structure[0])) # layer 1: denoising AE layers.append(get_denoising_autoencoder(structure[1])) # layer 2: AE layers.append(get_autoencoder(structure[2])) # layer 3: logistic regression used in supervised training layers.append(get_logistic_regressor(structure[3])) #construct training sets for different layers trainset = [ trainset, TransformerDataset(raw=trainset, transformer=layers[0]), TransformerDataset(raw=trainset, transformer=StackedBlocks(layers[0:2])), TransformerDataset(raw=trainset, transformer=StackedBlocks(layers[0:3])) ] # construct layer trainers layer_trainers = [] layer_trainers.append(get_layer_trainer_sgd_rbm(layers[0], trainset[0])) layer_trainers.append( get_layer_trainer_sgd_autoencoder(layers[1], trainset[1])) layer_trainers.append( get_layer_trainer_sgd_autoencoder(layers[2], trainset[2])) layer_trainers.append(get_layer_trainer_logistic(layers[3], trainset[3])) #unsupervised pretraining for i, layer_trainer in enumerate(layer_trainers[0:3]): print '-----------------------------------' print ' Unsupervised training layer %d, %s' % (i, layers[i].__class__) print '-----------------------------------' layer_trainer.main_loop() print '\n' print '------------------------------------------------------' print ' Unsupervised training done! Start supervised training...' print '------------------------------------------------------' print '\n' #supervised training layer_trainers[-1].main_loop()
def main(): trainset, validset, testset, extraset = get_dataset_icml() #trainset,testset = get_dataset_mnist() design_matrix = trainset.get_design_matrix() n_input = design_matrix.shape[1] n_output = 9 #10 # build layers layers = [] structure = [[n_input, 1000], [1000,1000],[1000,1000], [1000, n_output]] #layers.append(get_grbm(structure[0])) # layer 0: denoising AE layers.append(get_grbm(structure[0])) # layer 1: denoising AE layers.append(get_grbm(structure[1])) # layer 1: denoising AE layers.append(get_grbm(structure[2])) # layer 2: logistic regression used in supervised training #layers.append(get_logistic_regressor(structure[3])) #construct training sets for different layers traindata = [ extraset , TransformerDataset( raw = extraset, transformer = layers[0] ), TransformerDataset( raw = extraset, transformer = StackedBlocks( layers[0:2] )), TransformerDataset( raw = extraset, transformer = StackedBlocks( layers[0:3] )) ] #valid = TransformerDataset( raw = validset, transformer = StackedBlocks( layers[0:2] )) #valid = trainset # construct layer trainers layer_trainers = [] #layer_trainers.append(get_layer_trainer_sgd_rbm(layers[0], trainset[0])) layer_trainers.append(get_layer_trainer_sgd_rbm(layers[0], traindata[0],'db1.pkl')) layer_trainers.append(get_layer_trainer_sgd_rbm(layers[1], traindata[1],'db2.pkl')) layer_trainers.append(get_layer_trainer_sgd_rbm(layers[2], traindata[2],'db3.pkl')) #layer_trainers.append(get_layer_trainer_logistic(layers[2], trainset[2], valid)) #unsupervised pretraining for i, layer_trainer in enumerate(layer_trainers[0:3]): print '-----------------------------------' print ' Unsupervised training (pretraining) layer %d, %s'%(i, layers[i].__class__) print '-----------------------------------' layer_trainer.main_loop() print '\n' print '------------------------------------------------------' print ' Unsupervised training done! Start supervised training (fine-tuning)...' print '------------------------------------------------------' print '\n' mlp_layers = [] mlp_layers.append(PretrainedLayer(layer_name = 'h0', layer_content = serial.load('db1.pkl'))) mlp_layers.append(PretrainedLayer(layer_name = 'h1', layer_content = serial.load('db2.pkl'))) mlp_layers.append(PretrainedLayer(layer_name = 'h2', layer_content = serial.load('db3.pkl'))) #supervised training #layer_trainers[-1].main_loop() mlp_model = get_layer_MLP(mlp_layers,trainset,validset) mlp_model.main_loop()
pca1 = create_pca(conf, layer1, data, model=layer1['name']) data = [ utils.sharedX(pca1.function()(set.get_value(borrow=True)), borrow=True) for set in data ] # Second layer : train or load a DAE or CAE ae = create_ae(conf, layer2, data, model=layer2['name']) data = [ utils.sharedX(ae.function()(set.get_value(borrow=True)), borrow=True) for set in data ] # Third layer : train or load a PCA pca2 = create_pca(conf, layer3, data, model=layer3['name']) data = [ utils.sharedX(pca2.function()(set.get_value(borrow=True)), borrow=True) for set in data ] # Compute the ALC for example with labels if conf['transfer']: data_train, label_train = utils.filter_labels(data[0], label) alc = embed.score(data_train, label_train) print '... resulting ALC on train is', alc conf['train_alc'] = alc # Stack both layers and create submission file block = StackedBlocks([pca1, ae, pca2]) utils.create_submission(conf, block.function())