def c_6layer_mnist_imputation(seed=0, ctype='cva', pertub_type=3, pertub_prob=6, pertub_prob1=14, visualization_times=20, denoise_times=200, predir=None, n_batch=144, dataset='mnist.pkl.gz', batch_size=500): """ Missing data imputation """ #cp->cd->cpd->cd->c nkerns=[32, 32, 64, 64, 64] drops=[0, 0, 0, 0, 0, 1] #skerns=[5, 3, 3, 3, 3] #pools=[2, 1, 1, 2, 1] #modes=['same']*5 n_hidden=[500, 50] drop_inverses=[1,] # 28->12->12->5->5/5*5*64->500->50->500->5*5*64/5->5->12->12->28 if dataset=='mnist.pkl.gz': dim_input=(28, 28) colorImg=False logdir = 'results/imputation/'+ctype+'/mnist/'+ctype+'_6layer_mnist_'+str(pertub_type)+'_'+str(pertub_prob)+'_'+str(pertub_prob1)+'_'+str(denoise_times)+'_' logdir += str(int(time.time()))+'/' if not os.path.exists(logdir): os.makedirs(logdir) print predir with open(logdir+'hook.txt', 'a') as f: print >>f, predir train_set_x, test_set_x, test_set_x_pertub, pertub_label, pertub_number = datapy.load_pertub_data(dirs='data_imputation/', pertub_type=pertub_type, pertub_prob=pertub_prob,pertub_prob1=pertub_prob1) datasets = datapy.load_data_gpu(dataset, have_matrix=True) _, _, _ = datasets[0] valid_set_x, _, _ = datasets[1] _, _, _ = datasets[2] # compute number of minibatches for training, validation and testing n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') x_pertub = T.matrix('x_pertub') # the data is presented as rasterized images p_label = T.matrix('p_label') random_z = T.matrix('random_z') drop = T.iscalar('drop') drop_inverse = T.iscalar('drop_inverse') activation = nonlinearity.relu rng = np.random.RandomState(seed) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) input_x = x_pertub.reshape((batch_size, 1, 28, 28)) recg_layer = [] cnn_output = [] #1 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), border_mode='valid', activation=activation )) if drops[0]==1: cnn_output.append(recg_layer[-1].drop_output(input=input_x, drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(input=input_x)) #2 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[1]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #3 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[1], 12, 12), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), border_mode='valid', activation=activation )) if drops[2]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #4 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[2], 5, 5), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[3]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #5 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[3], 5, 5), filter_shape=(nkerns[4], nkerns[3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[4]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) mlp_input_x = cnn_output[-1].flatten(2) activations = [] #1 recg_layer.append(FullyConnected.FullyConnected( rng=rng, n_in= 5 * 5 * nkerns[-1], n_out=n_hidden[0], activation=activation )) if drops[-1]==1: activations.append(recg_layer[-1].drop_output(input=mlp_input_x, drop=drop, rng=rng_share)) else: activations.append(recg_layer[-1].output(input=mlp_input_x)) #stochastic layer recg_layer.append(GaussianHidden.GaussianHidden( rng=rng, input=activations[-1], n_in=n_hidden[0], n_out = n_hidden[1], activation=None )) z = recg_layer[-1].sample_z(rng_share) gene_layer = [] z_output = [] random_z_output = [] #1 gene_layer.append(FullyConnected.FullyConnected( rng=rng, n_in=n_hidden[1], n_out = n_hidden[0], activation=activation )) z_output.append(gene_layer[-1].output(input=z)) random_z_output.append(gene_layer[-1].output(input=random_z)) #2 gene_layer.append(FullyConnected.FullyConnected( rng=rng, n_in=n_hidden[0], n_out = 5*5*nkerns[-1], activation=activation )) if drop_inverses[0]==1: z_output.append(gene_layer[-1].drop_output(input=z_output[-1], drop=drop_inverse, rng=rng_share)) random_z_output.append(gene_layer[-1].drop_output(input=random_z_output[-1], drop=drop_inverse, rng=rng_share)) else: z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output(input=random_z_output[-1])) input_z = z_output[-1].reshape((batch_size, nkerns[-1], 5, 5)) input_random_z = random_z_output[-1].reshape((n_batch, nkerns[-1], 5, 5)) #1 gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-1], 5, 5), filter_shape=(nkerns[-2], nkerns[-1], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) z_output.append(gene_layer[-1].output(input=input_z)) random_z_output.append(gene_layer[-1].output_random_generation(input=input_random_z, n_batch=n_batch)) #2 gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-2], 5, 5), filter_shape=(nkerns[-3], nkerns[-2], 3, 3), poolsize=(2, 2), border_mode='full', activation=activation )) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) #3 gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-3], 12, 12), filter_shape=(nkerns[-4], nkerns[-3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) #4 gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-4], 12, 12), filter_shape=(nkerns[-5], nkerns[-4], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) #5 stochastic layer # for the last layer, the nonliearity should be sigmoid to achieve mean of Bernoulli gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-5], 12, 12), filter_shape=(1, nkerns[-5], 5, 5), poolsize=(2, 2), border_mode='full', activation=nonlinearity.sigmoid )) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) gene_layer.append(NoParamsBernoulliVisiable.NoParamsBernoulliVisiable( #rng=rng, #mean=z_output[-1], #data=input_x, )) logpx = gene_layer[-1].logpx(mean=z_output[-1], data=input_x) # 4-D tensor of random generation random_x_mean = random_z_output[-1] random_x = gene_layer[-1].sample_x(rng_share, random_x_mean) x_denoised = z_output[-1].flatten(2) x_denoised = p_label*x+(1-p_label)*x_denoised mse = ((x - x_denoised)**2).sum() / pertub_number params=[] for g in gene_layer: params+=g.params for r in recg_layer: params+=r.params train_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x_pertub: train_set_x[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) valid_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x_pertub: valid_set_x[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) test_activations = theano.function( inputs=[x_pertub], outputs=T.concatenate(activations, axis=1), givens={ drop: np.cast['int32'](0) } ) imputation_model = theano.function( inputs=[index, x_pertub], outputs=[x_denoised, mse], givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], p_label:pertub_label[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), drop_inverse: np.cast['int32'](0) } ) ################## # Pretrain MODEL # ################## model_epoch = 600 if os.environ.has_key('model_epoch'): model_epoch = int(os.environ['model_epoch']) if predir is not None: color.printBlue('... setting parameters') color.printBlue(predir) if model_epoch == -1: pre_train = np.load(predir+'best-model.npz') else: pre_train = np.load(predir+'model-'+str(model_epoch)+'.npz') pre_train = pre_train['model'] if ctype == 'cva': for (para, pre) in zip(params, pre_train): para.set_value(pre) elif ctype == 'cmmva': for (para, pre) in zip(params, pre_train[:-2]): para.set_value(pre) else: exit() else: exit() ############### # TRAIN MODEL # ############### print '... training' epoch = 0 n_visualization = 100 output = np.ones((n_visualization, visualization_times+2, 784)) output[:,0,:] = test_set_x.get_value()[:n_visualization,:] output[:,1,:] = test_set_x_pertub.get_value()[:n_visualization,:] image = paramgraphics.mat_to_img(output[:,0,:].T, dim_input, colorImg=colorImg) image.save(logdir+'data.png', 'PNG') image = paramgraphics.mat_to_img(output[:,1,:].T, dim_input, colorImg=colorImg) image.save(logdir+'data_pertub.png', 'PNG') tmp = test_set_x_pertub.get_value() while epoch < denoise_times: epoch = epoch + 1 this_mse=0 for i in xrange(n_test_batches): d, m = imputation_model(i, tmp[i * batch_size: (i + 1) * batch_size]) tmp[i * batch_size: (i + 1) * batch_size] = np.asarray(d) this_mse+=m if epoch<=visualization_times: output[:,epoch+1,:] = tmp[:n_visualization,:] print epoch, this_mse with open(logdir+'hook.txt', 'a') as f: print >>f, epoch, this_mse image = paramgraphics.mat_to_img(tmp[:n_visualization,:].T, dim_input, colorImg=colorImg) image.save(logdir+'procedure-'+str(epoch)+'.png', 'PNG') np.savez(logdir+'procedure-'+str(epoch), tmp=tmp) image = paramgraphics.mat_to_img((output.reshape(-1,784)).T, dim_input, colorImg=colorImg, tile_shape=(n_visualization,22)) image.save(logdir+'output.png', 'PNG') np.savez(logdir+'output', output=output) # save original train features and denoise test features for i in xrange(n_train_batches): if i == 0: train_features = np.asarray(train_activations(i)) else: train_features = np.vstack((train_features, np.asarray(train_activations(i)))) for i in xrange(n_valid_batches): if i == 0: valid_features = np.asarray(valid_activations(i)) else: valid_features = np.vstack((valid_features, np.asarray(valid_activations(i)))) for i in xrange(n_test_batches): if i == 0: test_features = np.asarray(test_activations(tmp[i * batch_size: (i + 1) * batch_size])) else: test_features = np.vstack((test_features, np.asarray(test_activations(tmp[i * batch_size: (i + 1) * batch_size])))) np.save(logdir+'train_features', train_features) np.save(logdir+'valid_features', valid_features) np.save(logdir+'test_features', test_features)
def c_6layer_mnist_imputation(seed=0, ctype='cva', pertub_type=3, pertub_prob=6, pertub_prob1=14, visualization_times=20, denoise_times=200, predir=None, n_batch=144, dataset='mnist.pkl.gz', batch_size=500): """ Missing data imputation """ #cp->cd->cpd->cd->c nkerns = [32, 32, 64, 64, 64] drops = [0, 0, 0, 0, 0, 1] #skerns=[5, 3, 3, 3, 3] #pools=[2, 1, 1, 2, 1] #modes=['same']*5 n_hidden = [500, 50] drop_inverses = [ 1, ] # 28->12->12->5->5/5*5*64->500->50->500->5*5*64/5->5->12->12->28 if dataset == 'mnist.pkl.gz': dim_input = (28, 28) colorImg = False logdir = 'results/imputation/' + ctype + '/mnist/' + ctype + '_6layer_mnist_' + str( pertub_type) + '_' + str(pertub_prob) + '_' + str( pertub_prob1) + '_' + str(denoise_times) + '_' logdir += str(int(time.time())) + '/' if not os.path.exists(logdir): os.makedirs(logdir) print predir with open(logdir + 'hook.txt', 'a') as f: print >> f, predir train_set_x, test_set_x, test_set_x_pertub, pertub_label, pertub_number = datapy.load_pertub_data( dirs='data_imputation/', pertub_type=pertub_type, pertub_prob=pertub_prob, pertub_prob1=pertub_prob1) datasets = datapy.load_data_gpu(dataset, have_matrix=True) _, _, _ = datasets[0] valid_set_x, _, _ = datasets[1] _, _, _ = datasets[2] # compute number of minibatches for training, validation and testing n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') x_pertub = T.matrix( 'x_pertub') # the data is presented as rasterized images p_label = T.matrix('p_label') random_z = T.matrix('random_z') drop = T.iscalar('drop') drop_inverse = T.iscalar('drop_inverse') activation = nonlinearity.relu rng = np.random.RandomState(seed) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) input_x = x_pertub.reshape((batch_size, 1, 28, 28)) recg_layer = [] cnn_output = [] #1 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), border_mode='valid', activation=activation)) if drops[0] == 1: cnn_output.append(recg_layer[-1].drop_output(input=input_x, drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(input=input_x)) #2 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[1] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #3 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[1], 12, 12), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), border_mode='valid', activation=activation)) if drops[2] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #4 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[2], 5, 5), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[3] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #5 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[3], 5, 5), filter_shape=(nkerns[4], nkerns[3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[4] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) mlp_input_x = cnn_output[-1].flatten(2) activations = [] #1 recg_layer.append( FullyConnected.FullyConnected(rng=rng, n_in=5 * 5 * nkerns[-1], n_out=n_hidden[0], activation=activation)) if drops[-1] == 1: activations.append(recg_layer[-1].drop_output(input=mlp_input_x, drop=drop, rng=rng_share)) else: activations.append(recg_layer[-1].output(input=mlp_input_x)) #stochastic layer recg_layer.append( GaussianHidden.GaussianHidden(rng=rng, input=activations[-1], n_in=n_hidden[0], n_out=n_hidden[1], activation=None)) z = recg_layer[-1].sample_z(rng_share) gene_layer = [] z_output = [] random_z_output = [] #1 gene_layer.append( FullyConnected.FullyConnected(rng=rng, n_in=n_hidden[1], n_out=n_hidden[0], activation=activation)) z_output.append(gene_layer[-1].output(input=z)) random_z_output.append(gene_layer[-1].output(input=random_z)) #2 gene_layer.append( FullyConnected.FullyConnected(rng=rng, n_in=n_hidden[0], n_out=5 * 5 * nkerns[-1], activation=activation)) if drop_inverses[0] == 1: z_output.append(gene_layer[-1].drop_output(input=z_output[-1], drop=drop_inverse, rng=rng_share)) random_z_output.append(gene_layer[-1].drop_output( input=random_z_output[-1], drop=drop_inverse, rng=rng_share)) else: z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append( gene_layer[-1].output(input=random_z_output[-1])) input_z = z_output[-1].reshape((batch_size, nkerns[-1], 5, 5)) input_random_z = random_z_output[-1].reshape((n_batch, nkerns[-1], 5, 5)) #1 gene_layer.append( UnpoolConvNon.UnpoolConvNon(rng, image_shape=(batch_size, nkerns[-1], 5, 5), filter_shape=(nkerns[-2], nkerns[-1], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) z_output.append(gene_layer[-1].output(input=input_z)) random_z_output.append(gene_layer[-1].output_random_generation( input=input_random_z, n_batch=n_batch)) #2 gene_layer.append( UnpoolConvNon.UnpoolConvNon(rng, image_shape=(batch_size, nkerns[-2], 5, 5), filter_shape=(nkerns[-3], nkerns[-2], 3, 3), poolsize=(2, 2), border_mode='full', activation=activation)) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation( input=random_z_output[-1], n_batch=n_batch)) #3 gene_layer.append( UnpoolConvNon.UnpoolConvNon(rng, image_shape=(batch_size, nkerns[-3], 12, 12), filter_shape=(nkerns[-4], nkerns[-3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation( input=random_z_output[-1], n_batch=n_batch)) #4 gene_layer.append( UnpoolConvNon.UnpoolConvNon(rng, image_shape=(batch_size, nkerns[-4], 12, 12), filter_shape=(nkerns[-5], nkerns[-4], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation( input=random_z_output[-1], n_batch=n_batch)) #5 stochastic layer # for the last layer, the nonliearity should be sigmoid to achieve mean of Bernoulli gene_layer.append( UnpoolConvNon.UnpoolConvNon(rng, image_shape=(batch_size, nkerns[-5], 12, 12), filter_shape=(1, nkerns[-5], 5, 5), poolsize=(2, 2), border_mode='full', activation=nonlinearity.sigmoid)) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation( input=random_z_output[-1], n_batch=n_batch)) gene_layer.append( NoParamsBernoulliVisiable.NoParamsBernoulliVisiable( #rng=rng, #mean=z_output[-1], #data=input_x, )) logpx = gene_layer[-1].logpx(mean=z_output[-1], data=input_x) # 4-D tensor of random generation random_x_mean = random_z_output[-1] random_x = gene_layer[-1].sample_x(rng_share, random_x_mean) x_denoised = z_output[-1].flatten(2) x_denoised = p_label * x + (1 - p_label) * x_denoised mse = ((x - x_denoised)**2).sum() / pertub_number params = [] for g in gene_layer: params += g.params for r in recg_layer: params += r.params train_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x_pertub: train_set_x[index * batch_size:(index + 1) * batch_size], drop: np.cast['int32'](0) }) valid_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x_pertub: valid_set_x[index * batch_size:(index + 1) * batch_size], drop: np.cast['int32'](0) }) test_activations = theano.function(inputs=[x_pertub], outputs=T.concatenate(activations, axis=1), givens={drop: np.cast['int32'](0)}) imputation_model = theano.function( inputs=[index, x_pertub], outputs=[x_denoised, mse], givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], p_label: pertub_label[index * batch_size:(index + 1) * batch_size], drop: np.cast['int32'](0), drop_inverse: np.cast['int32'](0) }) ################## # Pretrain MODEL # ################## model_epoch = 600 if os.environ.has_key('model_epoch'): model_epoch = int(os.environ['model_epoch']) if predir is not None: color.printBlue('... setting parameters') color.printBlue(predir) if model_epoch == -1: pre_train = np.load(predir + 'best-model.npz') else: pre_train = np.load(predir + 'model-' + str(model_epoch) + '.npz') pre_train = pre_train['model'] if ctype == 'cva': for (para, pre) in zip(params, pre_train): para.set_value(pre) elif ctype == 'cmmva': for (para, pre) in zip(params, pre_train[:-2]): para.set_value(pre) else: exit() else: exit() ############### # TRAIN MODEL # ############### print '... training' epoch = 0 n_visualization = 100 output = np.ones((n_visualization, visualization_times + 2, 784)) output[:, 0, :] = test_set_x.get_value()[:n_visualization, :] output[:, 1, :] = test_set_x_pertub.get_value()[:n_visualization, :] image = paramgraphics.mat_to_img(output[:, 0, :].T, dim_input, colorImg=colorImg) image.save(logdir + 'data.png', 'PNG') image = paramgraphics.mat_to_img(output[:, 1, :].T, dim_input, colorImg=colorImg) image.save(logdir + 'data_pertub.png', 'PNG') tmp = test_set_x_pertub.get_value() while epoch < denoise_times: epoch = epoch + 1 this_mse = 0 for i in xrange(n_test_batches): d, m = imputation_model(i, tmp[i * batch_size:(i + 1) * batch_size]) tmp[i * batch_size:(i + 1) * batch_size] = np.asarray(d) this_mse += m if epoch <= visualization_times: output[:, epoch + 1, :] = tmp[:n_visualization, :] print epoch, this_mse with open(logdir + 'hook.txt', 'a') as f: print >> f, epoch, this_mse image = paramgraphics.mat_to_img(tmp[:n_visualization, :].T, dim_input, colorImg=colorImg) image.save(logdir + 'procedure-' + str(epoch) + '.png', 'PNG') np.savez(logdir + 'procedure-' + str(epoch), tmp=tmp) image = paramgraphics.mat_to_img((output.reshape(-1, 784)).T, dim_input, colorImg=colorImg, tile_shape=(n_visualization, 22)) image.save(logdir + 'output.png', 'PNG') np.savez(logdir + 'output', output=output) # save original train features and denoise test features for i in xrange(n_train_batches): if i == 0: train_features = np.asarray(train_activations(i)) else: train_features = np.vstack( (train_features, np.asarray(train_activations(i)))) for i in xrange(n_valid_batches): if i == 0: valid_features = np.asarray(valid_activations(i)) else: valid_features = np.vstack( (valid_features, np.asarray(valid_activations(i)))) for i in xrange(n_test_batches): if i == 0: test_features = np.asarray( test_activations(tmp[i * batch_size:(i + 1) * batch_size])) else: test_features = np.vstack( (test_features, np.asarray( test_activations(tmp[i * batch_size:(i + 1) * batch_size])))) np.save(logdir + 'train_features', train_features) np.save(logdir + 'valid_features', valid_features) np.save(logdir + 'test_features', test_features)
def c_6layer_mnist_imputation(seed=0, pertub_type=3, pertub_prob=6, pertub_prob1=14, predir=None, n_batch=144, dataset='mnist.pkl.gz', batch_size=500): """ Missing data imputation """ #cp->cd->cpd->cd->c nkerns = [32, 32, 64, 64, 64] drops = [0, 0, 0, 0, 0, 1] #skerns=[5, 3, 3, 3, 3] #pools=[2, 1, 1, 2, 1] #modes=['same']*5 n_hidden = [500, 50] drop_inverses = [ 1, ] # 28->12->12->5->5/5*5*64->500->50->500->5*5*64/5->5->12->12->28 if dataset == 'mnist.pkl.gz': dim_input = (28, 28) colorImg = False train_set_x, test_set_x, test_set_x_pertub, pertub_label, pertub_number = datapy.load_pertub_data( dirs='data_imputation/', pertub_type=pertub_type, pertub_prob=pertub_prob, pertub_prob1=pertub_prob1) datasets = datapy.load_data_gpu(dataset, have_matrix=True) _, train_set_y, train_y_matrix = datasets[0] valid_set_x, valid_set_y, valid_y_matrix = datasets[1] _, test_set_y, test_y_matrix = datasets[2] # compute number of minibatches for training, validation and testing n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') #x_pertub = T.matrix('x_pertub') # the data is presented as rasterized images #p_label = T.matrix('p_label') y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels y_matrix = T.imatrix('y_matrix') drop = T.iscalar('drop') drop_inverse = T.iscalar('drop_inverse') activation = nonlinearity.relu rng = np.random.RandomState(seed) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) input_x = x.reshape((batch_size, 1, 28, 28)) recg_layer = [] cnn_output = [] #1 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), border_mode='valid', activation=activation)) if drops[0] == 1: cnn_output.append(recg_layer[-1].drop_output(input=input_x, drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(input=input_x)) #2 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[1] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #3 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[1], 12, 12), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), border_mode='valid', activation=activation)) if drops[2] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #4 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[2], 5, 5), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[3] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #5 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[3], 5, 5), filter_shape=(nkerns[4], nkerns[3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[4] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) mlp_input = cnn_output[-1].flatten(2) recg_layer.append( FullyConnected.FullyConnected(rng=rng, n_in=nkerns[4] * 5 * 5, n_out=500, activation=activation)) feature = recg_layer[-1].drop_output(mlp_input, drop=drop, rng=rng_share) # classify the values of the fully-connected sigmoidal layer classifier = Pegasos.Pegasos(input=feature, rng=rng, n_in=500, n_out=10, weight_decay=0, loss=1) # the cost we minimize during training is the NLL of the model cost = classifier.hinge_loss(10, y, y_matrix) * batch_size weight_decay = 1.0 / n_train_batches # create a list of all model parameters to be fit by gradient descent params = [] for r in recg_layer: params += r.params params += classifier.params # create a list of gradients for all model parameters grads = T.grad(cost, params) learning_rate = 3e-4 l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32)) get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r, decay1=0.1, decay2=0.001, weight_decay=weight_decay) updates = get_optimizer(params, grads) ''' Save parameters and activations ''' parameters = theano.function( inputs=[], outputs=params, ) # create a function to compute the mistakes that are made by the model test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) }) test_pertub_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x_pertub[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], drop: np.cast['int32'](0) }) ################## # Pretrain MODEL # ################## model_epoch = 250 if os.environ.has_key('model_epoch'): model_epoch = int(os.environ['model_epoch']) if predir is not None: color.printBlue('... setting parameters') color.printBlue(predir) if model_epoch == -1: pre_train = np.load(predir + 'best-model.npz') else: pre_train = np.load(predir + 'model-' + str(model_epoch) + '.npz') pre_train = pre_train['model'] for (para, pre) in zip(params, pre_train): para.set_value(pre) else: exit() ############### # TRAIN MODEL # ############### valid_losses = [validate_model(i) for i in xrange(n_valid_batches)] valid_score = np.mean(valid_losses) test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) test_losses_pertub = [test_pertub_model(i) for i in xrange(n_test_batches)] test_score_pertub = np.mean(test_losses_pertub) print valid_score, test_score, test_score_pertub
def c_6layer_mnist_imputation(seed=0, pertub_type=3, pertub_prob=6, pertub_prob1=14, predir=None, n_batch=144, dataset='mnist.pkl.gz', batch_size=500): """ Missing data imputation """ #cp->cd->cpd->cd->c nkerns=[32, 32, 64, 64, 64] drops=[0, 0, 0, 0, 0, 1] #skerns=[5, 3, 3, 3, 3] #pools=[2, 1, 1, 2, 1] #modes=['same']*5 n_hidden=[500, 50] drop_inverses=[1,] # 28->12->12->5->5/5*5*64->500->50->500->5*5*64/5->5->12->12->28 if dataset=='mnist.pkl.gz': dim_input=(28, 28) colorImg=False train_set_x, test_set_x, test_set_x_pertub, pertub_label, pertub_number = datapy.load_pertub_data(dirs='data_imputation/', pertub_type=pertub_type, pertub_prob=pertub_prob,pertub_prob1=pertub_prob1) datasets = datapy.load_data_gpu(dataset, have_matrix=True) _, train_set_y, train_y_matrix = datasets[0] valid_set_x, valid_set_y, valid_y_matrix = datasets[1] _, test_set_y, test_y_matrix = datasets[2] # compute number of minibatches for training, validation and testing n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') #x_pertub = T.matrix('x_pertub') # the data is presented as rasterized images #p_label = T.matrix('p_label') y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels y_matrix = T.imatrix('y_matrix') drop = T.iscalar('drop') drop_inverse = T.iscalar('drop_inverse') activation = nonlinearity.relu rng = np.random.RandomState(seed) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) input_x = x.reshape((batch_size, 1, 28, 28)) recg_layer = [] cnn_output = [] #1 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), border_mode='valid', activation=activation )) if drops[0]==1: cnn_output.append(recg_layer[-1].drop_output(input=input_x, drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(input=input_x)) #2 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[1]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #3 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[1], 12, 12), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), border_mode='valid', activation=activation )) if drops[2]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #4 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[2], 5, 5), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[3]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #5 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[3], 5, 5), filter_shape=(nkerns[4], nkerns[3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[4]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) mlp_input = cnn_output[-1].flatten(2) recg_layer.append(FullyConnected.FullyConnected( rng=rng, n_in=nkerns[4] * 5 * 5, n_out=500, activation=activation )) feature = recg_layer[-1].drop_output(mlp_input, drop=drop, rng=rng_share) # classify the values of the fully-connected sigmoidal layer classifier = Pegasos.Pegasos(input=feature, rng=rng, n_in=500, n_out=10, weight_decay=0, loss=1) # the cost we minimize during training is the NLL of the model cost = classifier.hinge_loss(10, y, y_matrix) * batch_size weight_decay=1.0/n_train_batches # create a list of all model parameters to be fit by gradient descent params=[] for r in recg_layer: params+=r.params params += classifier.params # create a list of gradients for all model parameters grads = T.grad(cost, params) learning_rate = 3e-4 l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32)) get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r, decay1 = 0.1, decay2 = 0.001, weight_decay=weight_decay) updates = get_optimizer(params,grads) ''' Save parameters and activations ''' parameters = theano.function( inputs=[], outputs=params, ) # create a function to compute the mistakes that are made by the model test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size], #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) test_pertub_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x_pertub[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size], #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) ################## # Pretrain MODEL # ################## model_epoch = 250 if os.environ.has_key('model_epoch'): model_epoch = int(os.environ['model_epoch']) if predir is not None: color.printBlue('... setting parameters') color.printBlue(predir) if model_epoch == -1: pre_train = np.load(predir+'best-model.npz') else: pre_train = np.load(predir+'model-'+str(model_epoch)+'.npz') pre_train = pre_train['model'] for (para, pre) in zip(params, pre_train): para.set_value(pre) else: exit() ############### # TRAIN MODEL # ############### valid_losses = [validate_model(i) for i in xrange(n_valid_batches)] valid_score = np.mean(valid_losses) test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) test_losses_pertub = [test_pertub_model(i) for i in xrange(n_test_batches)] test_score_pertub = np.mean(test_losses_pertub) print valid_score, test_score, test_score_pertub