def perform_lcn(saveDir, strl, x, y): n_channels = (0, 1, 2) dim_input = (32, 32) colorImg = True x = x.astype(np.float32) print x.shape print x.max() print x.min() print np.max(np.mean(x, axis=1)) print np.min(np.mean(x, axis=1)) print strl print y[:10] print y[40:50] image = paramgraphics.mat_to_img(x[:100, :].T, dim_input, colorImg=colorImg, scale=True) image.save(saveDir + 'svhn_before_lcn_gcn_norm_' + strl + '.png', 'PNG') #flatten->'b,c,0,1'->'b,0,1,c' x = x.reshape(-1, 3, 32, 32) x = np.swapaxes(x, 1, 2) x = np.swapaxes(x, 2, 3) lcn.transform(x=x, channels=n_channels, img_shape=dim_input) #'b,0,1,c'->'b,c,0,1'->flatten print x.shape x = np.swapaxes(x, 2, 3) x = np.swapaxes(x, 1, 2) x = x.reshape((-1, 32 * 32 * 3)) print x.max() print x.min() print np.max(np.mean(x, axis=1)) print np.min(np.mean(x, axis=1)) image = paramgraphics.mat_to_img(x[:100, :].T, dim_input, colorImg=colorImg, scale=True) image.save(saveDir + 'svhn_after_lcn_gcn_norm_' + strl + '.png', 'PNG') return x
def perform_lcn(saveDir,strl,x, y): n_channels=(0,1,2) dim_input = (32, 32) colorImg = True x = x.astype(np.float32) print x.shape print x.max() print x.min() print np.max(np.mean(x, axis=1)) print np.min(np.mean(x, axis=1)) print strl print y[:10] print y[40:50] image = paramgraphics.mat_to_img(x[:100,:].T, dim_input, colorImg=colorImg, scale=True) image.save(saveDir+'svhn_before_lcn_gcn_norm_'+strl+'.png', 'PNG') #flatten->'b,c,0,1'->'b,0,1,c' x = x.reshape(-1,3,32,32) x = np.swapaxes(x, 1, 2) x = np.swapaxes(x, 2, 3) lcn.transform(x=x,channels=n_channels,img_shape=dim_input) #'b,0,1,c'->'b,c,0,1'->flatten print x.shape x = np.swapaxes(x, 2, 3) x = np.swapaxes(x, 1, 2) x = x.reshape((-1,32*32*3)) print x.max() print x.min() print np.max(np.mean(x, axis=1)) print np.min(np.mean(x, axis=1)) image = paramgraphics.mat_to_img(x[:100,:].T, dim_input, colorImg=colorImg, scale=True) image.save(saveDir+'svhn_after_lcn_gcn_norm_'+strl+'.png', 'PNG') return x
data = zz['z_test_original'].T print data.shape data_perturbed = zz['z_test'].T print data_perturbed.shape pertub_label = zz['pertub_label'].astype(np.float32).T print pertub_label.shape pertub_number = float(np.sum(1-pertub_label)) print pertub_number denoise_epochs = 100 visualization_epochs = 20 num_vis = 100 num_vis1 = 64 images = data[:num_vis,:] image = paramgraphics.mat_to_img(data[:num_vis1,:].T, dim_input, colorImg=colorImg, scale=True) image.save(os.path.join(res_out, 'data.png'), 'PNG') image = paramgraphics.mat_to_img(data_perturbed[:num_vis1,:].T, dim_input, colorImg=colorImg, scale=True) image.save(os.path.join(res_out, 'before_denoise.png'), 'PNG') for i in xrange(denoise_epochs): data_perturbed = data_perturbed.astype(np.float32) if i < visualization_epochs: images = np.vstack((images, data_perturbed[:num_vis,:])) data_perturbed, mse = test_denoise(data_perturbed, pertub_label, 1000) print mse / pertub_number with open(logfile,'a') as f: f.write(str(mse / pertub_number) + "\n") #tile_shape = (visualization_epochs+1, num_vis)
start = int(pertub_prob) end = int(pertub_prob1) data_perturbed = np.zeros(data.shape) tmp_a = np.ones(width) tmp_a[start:end] = 0 #print tmp_a.shape #print tmp_a tmp_b = np.tile(tmp_a, height) print tmp_b.shape print pertub_label.shape pertub_label = (pertub_label.T*tmp_b).T data_perturbed = pertub_label*data+(1-pertub_label)*data_perturbed if pertub_type == 4: sio.savemat('data_imputation/type_'+str(pertub_type)+'_params_'+str(int(pertub_prob*100))+'_noise_rawdata.mat', {'z_train' : x_train.T, 'z_test_original' : data, 'z_test' : data_perturbed, 'pertub_label' : pertub_label}) #print data_perturbed[:,:25].shape image = paramgraphics.mat_to_img(data_perturbed[:,:25], (28,28), colorImg=False, scale=True) image.save('data_imputation/test_noise_4_'+str(pertub_prob)+'.png', 'PNG') elif pertub_type == 3: sio.savemat('data_imputation/type_'+str(pertub_type)+'_params_'+str(pertub_prob)+'_noise_rawdata.mat', {'z_train' : x_train.T, 'z_test_original' : data, 'z_test' : data_perturbed, 'pertub_label' : pertub_label}) #print data_perturbed[:,:25].shape image = paramgraphics.mat_to_img(data_perturbed[:,:25], (28,28), colorImg=False, scale=True) image.save('data_imputation/test_noise_3_'+str(pertub_prob)+'.png', 'PNG') elif pertub_type == 5: sio.savemat('data_imputation/type_'+str(pertub_type)+'_params_'+str(start)+'_'+str(end)+'_noise_rawdata.mat', {'z_train' : x_train.T, 'z_test_original' : data, 'z_test' : data_perturbed, 'pertub_label' : pertub_label}) #print data_perturbed[:,:25].shape image = paramgraphics.mat_to_img(data_perturbed[:,:25], (28,28), colorImg=False, scale=True) image.save('data_imputation/test_noise_5_'+str(start)+'_'+str(end)+'.png', 'PNG')
batch3_labels = [] for i in np.arange(len(y)): batch3_labels.append(y[i]) batch3_data = np.asarray(batch3_data).T batch3_labels = np.asarray(batch3_labels) print 'Check n x f' print batch1_data.shape print batch1_labels.shape print batch2_data.shape print batch2_labels.shape print batch3_data.shape print batch3_labels.shape image = paramgraphics.mat_to_img(batch1_data[:100, :].T, dim_input, colorImg=colorImg, scale=True) image.save(saveDir + 'svhn_train.png', 'PNG') image = paramgraphics.mat_to_img(batch2_data[:100, :].T, dim_input, colorImg=colorImg, scale=True) image.save(saveDir + 'svhn_valid.png', 'PNG') image = paramgraphics.mat_to_img(batch3_data[:100, :].T, dim_input, colorImg=colorImg, scale=True) image.save(saveDir + 'svhn_test.png', 'PNG') if preprocessing == 'gcn_var': batch1_data = pypp.global_contrast_normalize(batch1_data,
def cmmva_6layer_svhn(learning_rate=0.01, n_epochs=600, dataset='svhngcn_var', batch_size=500, dropout_flag=1, seed=0, predir=None, activation=None, n_batch=625, weight_decay=1e-4, super_predir=None, super_preepoch=None): """ Implementation of convolutional MMVA """ ''' svhn ''' n_channels = 3 colorImg = True dim_w = 32 dim_h = 32 dim_input=(dim_h, dim_w) n_classes = 10 D = 1.0 C = 1.0 if os.environ.has_key('C'): C = np.cast['float32'](float((os.environ['C']))) if os.environ.has_key('D'): D = np.cast['float32'](float((os.environ['D']))) color.printRed('D '+str(D)+' C '+str(C)) first_drop=0.5 if os.environ.has_key('first_drop'): first_drop = float(os.environ['first_drop']) last_drop=1 if os.environ.has_key('last_drop'): last_drop = float(os.environ['last_drop']) nkerns_1=96 if os.environ.has_key('nkerns_1'): nkerns_1 = int(os.environ['nkerns_1']) nkerns_2=96 if os.environ.has_key('nkerns_2'): nkerns_2 = int(os.environ['nkerns_2']) n_z=512 if os.environ.has_key('n_z'): n_z = int(os.environ['n_z']) opt_med='adam' if os.environ.has_key('opt_med'): opt_med = os.environ['opt_med'] train_logvar=True if os.environ.has_key('train_logvar'): train_logvar = bool(int(os.environ['train_logvar'])) std = 2e-2 if os.environ.has_key('std'): std = os.environ['std'] Loss_L = 1 if os.environ.has_key('Loss_L'): Loss_L = int(os.environ['Loss_L']) pattern = 'hinge' if os.environ.has_key('pattern'): pattern = os.environ['pattern'] #cp->cd->cpd->cd->c nkerns=[nkerns_1, nkerns_1, nkerns_1, nkerns_2, nkerns_2] drops=[0, 1, 1, 1, 0, 1] drop_p=[1, first_drop, first_drop, first_drop, 1, last_drop] n_hidden=[n_z] logdir = 'results/supervised/cmmva/svhn/cmmva_6layer_'+dataset+pattern+'_D_'+str(D)+'_C_'+str(C)+'_'#+str(nkerns)+str(n_hidden)+'_'+str(weight_decay)+'_'+str(learning_rate)+'_' #if predir is not None: # logdir +='pre_' #if dropout_flag == 1: # logdir += ('dropout_'+str(drops)+'_') # logdir += ('drop_p_'+str(drop_p)+'_') #logdir += ('trainvar_'+str(train_logvar)+'_') #logdir += (opt_med+'_') #logdir += (str(Loss_L)+'_') #if super_predir is not None: # logdir += (str(super_preepoch)+'_') logdir += str(int(time.time()))+'/' if not os.path.exists(logdir): os.makedirs(logdir) print 'logdir:', logdir, 'predir', predir print 'cmmva_6layer_svhn_fix', nkerns, n_hidden, seed, dropout_flag, drops, drop_p with open(logdir+'hook.txt', 'a') as f: print >>f, 'logdir:', logdir, 'predir', predir print >>f, 'cmmva_6layer_svhn_fix', nkerns, n_hidden, seed, dropout_flag, drops, drop_p color.printRed('dataset '+dataset) datasets = datapy.load_data_svhn(dataset, have_matrix=True) train_set_x, train_set_y, train_y_matrix = datasets[0] test_set_x, test_set_y, test_y_matrix = datasets[1] valid_set_x, valid_set_y, valid_y_matrix = datasets[2] #datasets = datapy.load_data_svhn(dataset, have_matrix=False) #train_set_x, train_set_y = datasets[0] #test_set_x, test_set_y = datasets[1] #valid_set_x, valid_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels random_z = T.matrix('random_z') y_matrix = T.imatrix('y_matrix') drop = T.iscalar('drop') activation = nonlinearity.relu rng = np.random.RandomState(seed) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) input_x = x.reshape((batch_size, n_channels, dim_h, dim_w)) recg_layer = [] cnn_output = [] l = [] d = [] #1 recg_layer.append(ConvMaxPool_GauInit_DNN.ConvMaxPool_GauInit_DNN( rng, image_shape=(batch_size, n_channels, dim_h, dim_w), filter_shape=(nkerns[0], n_channels, 5, 5), poolsize=(2, 2), border_mode='same', activation=activation, std=std )) if drops[0]==1: cnn_output.append(recg_layer[-1].drop_output(input=input_x, drop=drop, rng=rng_share, p=drop_p[0])) else: cnn_output.append(recg_layer[-1].output(input=input_x)) l+=[1, 2] d+=[1, 0] #2 recg_layer.append(ConvMaxPool_GauInit_DNN.ConvMaxPool_GauInit_DNN( rng, image_shape=(batch_size, nkerns[0], 16, 16), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation, std=std )) if drops[1]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share, p=drop_p[1])) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) l+=[1, 2] d+=[1, 0] #3 recg_layer.append(ConvMaxPool_GauInit_DNN.ConvMaxPool_GauInit_DNN( rng, image_shape=(batch_size, nkerns[1], 16, 16), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), border_mode='same', activation=activation, std=std )) if drops[2]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share, p=drop_p[2])) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) l+=[1, 2] d+=[1, 0] #4 recg_layer.append(ConvMaxPool_GauInit_DNN.ConvMaxPool_GauInit_DNN( rng, image_shape=(batch_size, nkerns[2], 8, 8), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation, std=std )) if drops[3]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share, p=drop_p[3])) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) l+=[1, 2] d+=[1, 0] #5 recg_layer.append(ConvMaxPool_GauInit_DNN.ConvMaxPool_GauInit_DNN( rng, image_shape=(batch_size, nkerns[3], 8, 8), filter_shape=(nkerns[4], nkerns[3], 3, 3), poolsize=(2, 2), border_mode='same', activation=activation, std=std )) if drops[4]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share, p=drop_p[4])) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) l+=[1, 2] d+=[1, 0] mlp_input_x = cnn_output[-1].flatten(2) activations = [] activations.append(mlp_input_x) classifier = Pegasos.Pegasos( input= activations[-1], rng=rng, n_in=nkerns[-1]*4*4, n_out=n_classes, weight_decay=0, loss=Loss_L, pattern=pattern ) l+=[1, 2] d+=[1, 0] #stochastic layer recg_layer.append(GaussianHidden.GaussianHidden( rng=rng, input=mlp_input_x, n_in=4*4*nkerns[-1], n_out=n_hidden[0], activation=None )) l+=[1, 2] d+=[1, 0] l+=[1, 2] d+=[1, 0] z = recg_layer[-1].sample_z(rng_share) gene_layer = [] z_output = [] random_z_output = [] #1 gene_layer.append(FullyConnected.FullyConnected( rng=rng, n_in=n_hidden[-1], n_out=4*4*nkerns[-1], activation=activation )) z_output.append(gene_layer[-1].output(input=z)) random_z_output.append(gene_layer[-1].output(input=random_z)) l+=[1, 2] d+=[1, 0] input_z = z_output[-1].reshape((batch_size, nkerns[-1], 4, 4)) input_random_z = random_z_output[-1].reshape((n_batch, nkerns[-1], 4, 4)) #1 gene_layer.append(UnpoolConvNon_GauInit_DNN.UnpoolConvNon_GauInit_DNN( rng, image_shape=(batch_size, nkerns[-1], 4, 4), filter_shape=(nkerns[-2], nkerns[-1], 3, 3), poolsize=(2, 2), border_mode='same', activation=activation )) l+=[1, 2] d+=[1, 0] z_output.append(gene_layer[-1].output(input=input_z)) random_z_output.append(gene_layer[-1].output_random_generation(input=input_random_z, n_batch=n_batch)) #2 gene_layer.append(UnpoolConvNon_GauInit_DNN.UnpoolConvNon_GauInit_DNN( rng, image_shape=(batch_size, nkerns[-2], 8, 8), filter_shape=(nkerns[-3], nkerns[-2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) l+=[1, 2] d+=[1, 0] z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) #3 gene_layer.append(UnpoolConvNon_GauInit_DNN.UnpoolConvNon_GauInit_DNN( rng, image_shape=(batch_size, nkerns[-3], 8, 8), filter_shape=(nkerns[-4], nkerns[-3], 3, 3), poolsize=(2, 2), border_mode='same', activation=activation )) l+=[1, 2] d+=[1, 0] z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) #4 gene_layer.append(UnpoolConvNon_GauInit_DNN.UnpoolConvNon_GauInit_DNN( rng, image_shape=(batch_size, nkerns[-4], 16, 16), filter_shape=(nkerns[-5], nkerns[-4], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) l+=[1, 2] d+=[1, 0] z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) #5-1 stochastic layer # for this layer, the activation is None to get a Guassian mean gene_layer.append(UnpoolConvNon_GauInit_DNN.UnpoolConvNon_GauInit_DNN( rng, image_shape=(batch_size, nkerns[-5], 16, 16), filter_shape=(n_channels, nkerns[-5], 5, 5), poolsize=(2, 2), border_mode='same', activation=None )) l+=[1, 2] d+=[1, 0] x_mean=gene_layer[-1].output(input=z_output[-1]) random_x_mean=gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch) #5-2 stochastic layer # for this layer, the activation is None to get logvar if train_logvar: gene_layer.append(UnpoolConvNon_GauInit_DNN.UnpoolConvNon_GauInit_DNN( rng, image_shape=(batch_size, nkerns[-5], 16, 16), filter_shape=(n_channels, nkerns[-5], 5, 5), poolsize=(2, 2), border_mode='same', activation=None )) l+=[1, 2] d+=[1, 0] x_logvar=gene_layer[-1].output(input=z_output[-1]) random_x_logvar=gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch) else: x_logvar = theano.shared(np.ones((batch_size, n_channels, dim_h, dim_w), dtype='float32')) random_x_logvar = theano.shared(np.ones((n_batch, n_channels, dim_h, dim_w), dtype='float32')) gene_layer.append(NoParamsGaussianVisiable.NoParamsGaussianVisiable( #rng=rng, #mean=z_output[-1], #data=input_x, )) logpx = gene_layer[-1].logpx(mean=x_mean, logvar=x_logvar, data=input_x) random_x = gene_layer[-1].sample_x(rng_share=rng_share, mean=random_x_mean, logvar=random_x_logvar) #L = (logpx + logpz - logqz).sum() lowerbound = ( (logpx + recg_layer[-1].logpz - recg_layer[-1].logqz).mean() ) hinge_loss = classifier.hinge_loss(10, y, y_matrix) cost = D * lowerbound - C * hinge_loss px = (logpx.mean()) pz = (recg_layer[-1].logpz.mean()) qz = (- recg_layer[-1].logqz.mean()) super_params=[] for r in recg_layer[:-1]: super_params+=r.params super_params+=classifier.params params=[] for g in gene_layer: params+=g.params for r in recg_layer: params+=r.params params+=classifier.params grads = [T.grad(cost, param) for param in params] l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32)) #get_optimizer = optimizer.get_adam_optimizer(learning_rate=learning_rate) if opt_med=='adam': get_optimizer = optimizer_separated.get_adam_optimizer_max(learning_rate=l_r, decay1 = 0.1, decay2 = 0.001, weight_decay=weight_decay) elif opt_med=='mom': get_optimizer = optimizer_separated.get_momentum_optimizer_max(learning_rate=l_r, weight_decay=weight_decay) updates = get_optimizer(w=params,g=grads, l=l, d=d) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=[classifier.errors(y), lowerbound, hinge_loss, cost], #outputs=layer[-1].errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) valid_model = theano.function( inputs=[index], outputs=[classifier.errors(y), lowerbound, hinge_loss, cost], #outputs=layer[-1].errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) valid_error = theano.function( inputs=[index], outputs=classifier.errors(y), #outputs=layer[-1].errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) ''' Save parameters and activations ''' pog = [] for (p,g) in zip(params, grads): pog.append(p.max()) pog.append((p**2).mean()) pog.append((g**2).mean()) pog.append((T.sqrt(pog[-2] / pog[-1]))/ 1e3) paramovergrad = theano.function( inputs=[index], outputs=pog, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](dropout_flag) } ) parameters = theano.function( inputs=[], outputs=params, ) generation_check = theano.function( inputs=[index], outputs=[x, x_mean.flatten(2), x_logvar.flatten(2)], givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], #y: train_set_y[index * batch_size: (index + 1) * batch_size], #y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) train_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), #y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) valid_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), #y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) test_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), #y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` debug_model = theano.function( inputs=[index], outputs=[classifier.errors(y), lowerbound, px, pz, qz, hinge_loss, cost], #updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](dropout_flag) } ) random_generation = theano.function( inputs=[random_z], outputs=[random_x_mean.flatten(2), random_x.flatten(2)], givens={ #drop: np.cast['int32'](0) } ) train_bound_without_dropout = theano.function( inputs=[index], outputs=[classifier.errors(y), lowerbound, hinge_loss, cost], givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) train_model = theano.function( inputs=[index], outputs=[classifier.errors(y), lowerbound, hinge_loss, cost, px, pz, qz, z], updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](dropout_flag), } ) ################## # Pretrain MODEL # ################## if predir is not None: color.printBlue('... setting parameters') color.printBlue(predir) pre_train = np.load(predir+'model.npz') pre_train = pre_train['model'] for (para, pre) in zip(params, pre_train): para.set_value(pre) tmp = [debug_model(i) for i in xrange(n_train_batches)] tmp = (np.asarray(tmp)).mean(axis=0) print '------------------', tmp if super_predir is not None: color.printBlue('... setting parameters') color.printBlue(super_predir) pre_train = np.load(super_predir+'svhn_model-'+str(super_preepoch)+'.npz') pre_train = pre_train['model'] for (para, pre) in zip(super_params, pre_train): para.set_value(pre) this_test_losses = [test_model(i) for i in xrange(n_test_batches)] this_test_score = np.mean(this_test_losses, axis=0) #print predir print 'preepoch', super_preepoch, 'pre_test_score', this_test_score with open(logdir+'hook.txt', 'a') as f: print >>f, predir print >>f, 'preepoch', super_preepoch, 'pre_test_score', this_test_score ############### # TRAIN MODEL # ############### print '... training' validation_frequency = n_train_batches predy_valid_stats = [1, 1, 0] start_time = time.clock() NaN_count = 0 epoch = 0 threshold = 0 generatition_frequency = 1 if predir is not None: threshold = 0 color.printRed('threshold, '+str(threshold) + ' generatition_frequency, '+str(generatition_frequency) +' validation_frequency, '+str(validation_frequency)) done_looping = False n_epochs = 80 decay_epochs = 40 record = 0 ''' print 'test initialization...' pre_model = parameters() for i in xrange(len(pre_model)): pre_model[i] = np.asarray(pre_model[i]) print pre_model[i].shape, np.mean(pre_model[i]), np.var(pre_model[i]) print 'end test...' ''' while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 minibatch_avg_cost = 0 train_error = 0 train_lowerbound = 0 train_hinge_loss = 0 _____z = 0 pxx = 0 pzz = 0 qzz = 0 preW = None currentW = None tmp_start1 = time.clock() if epoch == 30: validation_frequency = n_train_batches/5 if epoch == 50: validation_frequency = n_train_batches/10 if epoch == 30 or epoch == 50 or epoch == 70 or epoch == 90: record = epoch l_r.set_value(np.cast['float32'](l_r.get_value()/3.0)) print '---------', epoch, l_r.get_value() with open(logdir+'hook.txt', 'a') as f: print >>f,'---------', epoch, l_r.get_value() ''' test_epoch = epoch - decay_epochs if test_epoch > 0 and test_epoch % 5 == 0: l_r.set_value(np.cast['float32'](l_r.get_value()/3.0)) print '---------------', l_r.get_value() with open(logdir+'hook.txt', 'a') as f: print >>f, '---------------', l_r.get_value() ''' for minibatch_index in xrange(n_train_batches): e, l, h, ttt, tpx, tpz, tqz, _z = train_model(minibatch_index) pxx+=tpx pzz+=tpz qzz+=tqz #_____z += (np.asarray(_z)**2).sum() / (n_hidden[-1] * batch_size) train_error += e train_lowerbound += l train_hinge_loss += h minibatch_avg_cost += ttt ''' llll = debug_model(minibatch_index) with open(logdir+'hook.txt', 'a') as f: print >>f,'[]', llll ''' if math.isnan(ttt): color.printRed('--------'+str(epoch)+'--------'+str(minibatch_index)) exit() # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index ''' if (minibatch_index <11): preW = currentW currentW = parameters() for i in xrange(len(currentW)): currentW[i] = np.asarray(currentW[i]).astype(np.float32) if preW is not None: for (c,p) in zip(currentW, preW): #print minibatch_index, (c**2).mean(), ((c-p)**2).mean(), np.sqrt((c**2).mean()/((c-p)**2).mean()) with open(logdir+'delta_w.txt', 'a') as f: print >>f,minibatch_index, (c**2).mean(), ((c-p)**2).mean(), np.sqrt((c**2).mean()/((c-p)**2).mean()) ''' # check valid error only, to speed up ''' if (iter + 1) % validation_frequency != 0 and (iter + 1) %(validation_frequency/10) == 0: vt = [valid_error(i) for i in xrange(n_valid_batches)] vt = np.mean(vt) print 'quick valid error', vt with open(logdir+'hook.txt', 'a') as f: print >>f, 'quick valid error', vt print 'So far best model', predy_valid_stats with open(logdir+'hook.txt', 'a') as f: print >>f, 'So far best model', predy_valid_stats ''' if (iter + 1) % validation_frequency == 0: print minibatch_index, 'stochastic training error', train_error/float(minibatch_index), train_lowerbound/float(minibatch_index), train_hinge_loss/float(minibatch_index), minibatch_avg_cost /float(minibatch_index), pxx/float(minibatch_index), pzz/float(minibatch_index), qzz/float(minibatch_index)#, 'z_norm', _____z/float(minibatch_index) with open(logdir+'hook.txt', 'a') as f: print >>f, minibatch_index, 'stochastic training error', train_error/float(minibatch_index), train_lowerbound/float(minibatch_index), train_hinge_loss/float(minibatch_index), minibatch_avg_cost /float(minibatch_index), pxx/float(minibatch_index), pzz/float(minibatch_index), qzz/float(minibatch_index)#, 'z_norm', _____z/float(minibatch_index) valid_stats = [valid_model(i) for i in xrange(n_valid_batches)] this_valid_stats = np.mean(valid_stats, axis=0) print epoch, minibatch_index, 'validation stats', this_valid_stats #print tmp with open(logdir+'hook.txt', 'a') as f: print >>f, epoch, minibatch_index, 'validation stats', this_valid_stats print 'So far best model', predy_valid_stats with open(logdir+'hook.txt', 'a') as f: print >>f, 'So far best model', predy_valid_stats if this_valid_stats[0] < predy_valid_stats[0]: test_stats = [test_model(i) for i in xrange(n_test_batches)] this_test_stats = np.mean(test_stats, axis=0) predy_valid_stats[0] = this_valid_stats[0] predy_valid_stats[1] = this_test_stats[0] predy_valid_stats[2] = epoch record = epoch print 'Update best model', this_test_stats with open(logdir+'hook.txt', 'a') as f: print >>f,'Update best model', this_test_stats model = parameters() for i in xrange(len(model)): model[i] = np.asarray(model[i]).astype(np.float32) #print model[i].shape, np.mean(model[i]), np.var(model[i]) np.savez(logdir+'best-model', model=model) genezero = generation_check(0) with open(logdir+'gene_check.txt', 'a') as f: print >>f, 'epoch-----------------------', epoch print >>f, 'x', 'x_mean', 'x_logvar' ''' for i in xrange(len(genezero)): genezero[i] = np.asarray(genezero[i]) with open(logdir+'gene_check.txt', 'a') as f: print >>f, genezero[i].max(), genezero[i].min(), genezero[i].mean() with open(logdir+'gene_check.txt', 'a') as f: print >>f, 'norm', np.sqrt(((genezero[0]- genezero[1])**2).sum()) ''' if epoch==1: xxx = genezero[0] image = paramgraphics.mat_to_img(xxx.T, dim_input, colorImg=colorImg, scale=True) image.save(logdir+'data.png', 'PNG') if epoch%1==0: tail='-'+str(epoch)+'.png' xxx_now = genezero[1] image = paramgraphics.mat_to_img(xxx_now.T, dim_input, colorImg=colorImg, scale=True) image.save(logdir+'data_re'+tail, 'PNG') if math.isnan(minibatch_avg_cost): NaN_count+=1 color.printRed("NaN detected. Reverting to saved best parameters") print '---------------NaN_count:', NaN_count with open(logdir+'hook.txt', 'a') as f: print >>f, '---------------NaN_count:', NaN_count tmp = [debug_model(i) for i in xrange(n_train_batches)] tmp = (np.asarray(tmp)).mean(axis=0) print '------------------NaN check:', tmp with open(logdir+'hook.txt', 'a') as f: print >>f, '------------------NaN check:', tmp model = parameters() for i in xrange(len(model)): model[i] = np.asarray(model[i]).astype(np.float32) print model[i].shape, np.mean(model[i]), np.var(model[i]) print np.max(model[i]), np.min(model[i]) print np.all(np.isfinite(model[i])), np.any(np.isnan(model[i])) with open(logdir+'hook.txt', 'a') as f: print >>f, model[i].shape, np.mean(model[i]), np.var(model[i]) print >>f, np.max(model[i]), np.min(model[i]) print >>f, np.all(np.isfinite(model[i])), np.any(np.isnan(model[i])) best_before = np.load(logdir+'model.npz') best_before = best_before['model'] for (para, pre) in zip(params, best_before): para.set_value(pre) tmp = [debug_model(i) for i in xrange(n_train_batches)] tmp = (np.asarray(tmp)).mean(axis=0) print '------------------', tmp return if epoch%1==0: model = parameters() for i in xrange(len(model)): model[i] = np.asarray(model[i]).astype(np.float32) np.savez(logdir+'model-'+str(epoch), model=model) tmp_start4=time.clock() if epoch % generatition_frequency == 0: tail='-'+str(epoch)+'.png' random_z = np.random.standard_normal((n_batch, n_hidden[-1])).astype(np.float32) _x_mean, _x = random_generation(random_z) #print _x.shape #print _x_mean.shape image = paramgraphics.mat_to_img(_x.T, dim_input, colorImg=colorImg, scale=True) image.save(logdir+'samples'+tail, 'PNG') image = paramgraphics.mat_to_img(_x_mean.T, dim_input, colorImg=colorImg, scale=True) image.save(logdir+'mean_samples'+tail, 'PNG') #print 'generation_time', time.clock() - tmp_start4 #print 'one epoch time', time.clock() - tmp_start1 end_time = time.clock() print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if NaN_count > 0: print '---------------NaN_count:', NaN_count with open(logdir+'hook.txt', 'a') as f: print >>f, '---------------NaN_count:', NaN_count
def cva_6layer_dropout_mnist_60000(seed=0, dropout_flag=1, drop_inverses_flag=0, learning_rate=3e-4, predir=None, n_batch=144, dataset='mnist.pkl.gz', batch_size=500, nkerns=[20, 50], n_hidden=[500, 50]): """ Implementation of convolutional VA """ #cp->cd->cpd->cd->c nkerns = [32, 32, 64, 64, 64] drops = [1, 0, 1, 0, 0] #skerns=[5, 3, 3, 3, 3] #pools=[2, 1, 1, 2, 1] #modes=['same']*5 n_hidden = [500, 50] drop_inverses = [ 1, ] # 28->12->12->5->5/5*5*64->500->50->500->5*5*64/5->5->12->12->28 if dataset == 'mnist.pkl.gz': dim_input = (28, 28) colorImg = False logdir = 'results/supervised/cva/mnist/cva_6layer_mnist_60000' + str( nkerns) + str(n_hidden) + '_' + str(learning_rate) + '_' if predir is not None: logdir += 'pre_' if dropout_flag == 1: logdir += ('dropout_' + str(drops) + '_') if drop_inverses_flag == 1: logdir += ('inversedropout_' + str(drop_inverses) + '_') logdir += str(int(time.time())) + '/' if not os.path.exists(logdir): os.makedirs(logdir) print 'logdir:', logdir, 'predir', predir print 'cva_6layer_mnist_60000', nkerns, n_hidden, seed, drops, drop_inverses, dropout_flag, drop_inverses_flag with open(logdir + 'hook.txt', 'a') as f: print >> f, 'logdir:', logdir, 'predir', predir print >> f, 'cva_6layer_mnist_60000', nkerns, n_hidden, seed, drops, drop_inverses, dropout_flag, drop_inverses_flag datasets = datapy.load_data_gpu_60000(dataset, have_matrix=True) train_set_x, train_set_y, train_y_matrix = datasets[0] valid_set_x, valid_set_y, valid_y_matrix = datasets[1] test_set_x, test_set_y, test_y_matrix = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels random_z = T.matrix('random_z') drop = T.iscalar('drop') drop_inverse = T.iscalar('drop_inverse') activation = nonlinearity.relu rng = np.random.RandomState(seed) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) input_x = x.reshape((batch_size, 1, 28, 28)) recg_layer = [] cnn_output = [] #1 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), border_mode='valid', activation=activation)) if drops[0] == 1: cnn_output.append(recg_layer[-1].drop_output(input=input_x, drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(input=input_x)) #2 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[1] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #3 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[1], 12, 12), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), border_mode='valid', activation=activation)) if drops[2] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #4 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[2], 5, 5), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[3] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #5 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[3], 5, 5), filter_shape=(nkerns[4], nkerns[3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[4] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) mlp_input_x = cnn_output[-1].flatten(2) activations = [] #1 recg_layer.append( FullyConnected.FullyConnected(rng=rng, n_in=5 * 5 * nkerns[-1], n_out=n_hidden[0], activation=activation)) if drops[-1] == 1: activations.append(recg_layer[-1].drop_output(input=mlp_input_x, drop=drop, rng=rng_share)) else: activations.append(recg_layer[-1].output(input=mlp_input_x)) #stochastic layer recg_layer.append( GaussianHidden.GaussianHidden(rng=rng, input=activations[-1], n_in=n_hidden[0], n_out=n_hidden[1], activation=None)) z = recg_layer[-1].sample_z(rng_share) gene_layer = [] z_output = [] random_z_output = [] #1 gene_layer.append( FullyConnected.FullyConnected(rng=rng, n_in=n_hidden[1], n_out=n_hidden[0], activation=activation)) z_output.append(gene_layer[-1].output(input=z)) random_z_output.append(gene_layer[-1].output(input=random_z)) #2 gene_layer.append( FullyConnected.FullyConnected(rng=rng, n_in=n_hidden[0], n_out=5 * 5 * nkerns[-1], activation=activation)) if drop_inverses[0] == 1: z_output.append(gene_layer[-1].drop_output(input=z_output[-1], drop=drop_inverse, rng=rng_share)) random_z_output.append(gene_layer[-1].drop_output( input=random_z_output[-1], drop=drop_inverse, rng=rng_share)) else: z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append( gene_layer[-1].output(input=random_z_output[-1])) input_z = z_output[-1].reshape((batch_size, nkerns[-1], 5, 5)) input_random_z = random_z_output[-1].reshape((n_batch, nkerns[-1], 5, 5)) #1 gene_layer.append( UnpoolConvNon.UnpoolConvNon(rng, image_shape=(batch_size, nkerns[-1], 5, 5), filter_shape=(nkerns[-2], nkerns[-1], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) z_output.append(gene_layer[-1].output(input=input_z)) random_z_output.append(gene_layer[-1].output_random_generation( input=input_random_z, n_batch=n_batch)) #2 gene_layer.append( UnpoolConvNon.UnpoolConvNon(rng, image_shape=(batch_size, nkerns[-2], 5, 5), filter_shape=(nkerns[-3], nkerns[-2], 3, 3), poolsize=(2, 2), border_mode='full', activation=activation)) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation( input=random_z_output[-1], n_batch=n_batch)) #3 gene_layer.append( UnpoolConvNon.UnpoolConvNon(rng, image_shape=(batch_size, nkerns[-3], 12, 12), filter_shape=(nkerns[-4], nkerns[-3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation( input=random_z_output[-1], n_batch=n_batch)) #4 gene_layer.append( UnpoolConvNon.UnpoolConvNon(rng, image_shape=(batch_size, nkerns[-4], 12, 12), filter_shape=(nkerns[-5], nkerns[-4], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation( input=random_z_output[-1], n_batch=n_batch)) #5 stochastic layer # for the last layer, the nonliearity should be sigmoid to achieve mean of Bernoulli gene_layer.append( UnpoolConvNon.UnpoolConvNon(rng, image_shape=(batch_size, nkerns[-5], 12, 12), filter_shape=(1, nkerns[-5], 5, 5), poolsize=(2, 2), border_mode='full', activation=nonlinearity.sigmoid)) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation( input=random_z_output[-1], n_batch=n_batch)) gene_layer.append( NoParamsBernoulliVisiable.NoParamsBernoulliVisiable( #rng=rng, #mean=z_output[-1], #data=input_x, )) logpx = gene_layer[-1].logpx(mean=z_output[-1], data=input_x) # 4-D tensor of random generation random_x_mean = random_z_output[-1] random_x = gene_layer[-1].sample_x(rng_share, random_x_mean) #L = (logpx + logpz - logqz).sum() cost = ((logpx + recg_layer[-1].logpz - recg_layer[-1].logqz).sum()) px = (logpx.sum()) pz = (recg_layer[-1].logpz.sum()) qz = (-recg_layer[-1].logqz.sum()) params = [] for g in gene_layer: params += g.params for r in recg_layer: params += r.params gparams = [T.grad(cost, param) for param in params] weight_decay = 1.0 / n_train_batches l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32)) #get_optimizer = optimizer.get_adam_optimizer(learning_rate=learning_rate) get_optimizer = optimizer.get_adam_optimizer_max(learning_rate=l_r, decay1=0.1, decay2=0.001, weight_decay=weight_decay, epsilon=1e-8) with open(logdir + 'hook.txt', 'a') as f: print >> f, 'AdaM', learning_rate, weight_decay updates = get_optimizer(params, gparams) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=cost, #outputs=layer[-1].errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], #y: test_set_y[index * batch_size:(index + 1) * batch_size], #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), drop_inverse: np.cast['int32'](0) }) validate_model = theano.function( inputs=[index], outputs=cost, #outputs=layer[-1].errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], #y: valid_set_y[index * batch_size:(index + 1) * batch_size], #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), drop_inverse: np.cast['int32'](0) }) ''' Save parameters and activations ''' parameters = theano.function( inputs=[], outputs=params, ) train_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], drop: np.cast['int32'](0), #drop_inverse: np.cast['int32'](0) #y: train_set_y[index * batch_size: (index + 1) * batch_size] }) valid_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], drop: np.cast['int32'](0), #drop_inverse: np.cast['int32'](0) #y: valid_set_y[index * batch_size: (index + 1) * batch_size] }) test_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], drop: np.cast['int32'](0), #drop_inverse: np.cast['int32'](0) #y: test_set_y[index * batch_size: (index + 1) * batch_size] }) # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` debug_model = theano.function( inputs=[index], outputs=[cost, px, pz, qz], #updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], #y: train_set_y[index * batch_size: (index + 1) * batch_size], #y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](dropout_flag), drop_inverse: np.cast['int32'](drop_inverses_flag) }) random_generation = theano.function( inputs=[random_z], outputs=[random_x_mean.flatten(2), random_x.flatten(2)], givens={ #drop: np.cast['int32'](0), drop_inverse: np.cast['int32'](0) }) train_bound_without_dropout = theano.function( inputs=[index], outputs=cost, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], #y: train_set_y[index * batch_size: (index + 1) * batch_size], #y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), drop_inverse: np.cast['int32'](0) }) train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], #y: train_set_y[index * batch_size: (index + 1) * batch_size], #y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](dropout_flag), drop_inverse: np.cast['int32'](drop_inverses_flag) }) ################## # Pretrain MODEL # ################## if predir is not None: color.printBlue('... setting parameters') color.printBlue(predir) pre_train = np.load(predir + 'model.npz') pre_train = pre_train['model'] for (para, pre) in zip(params, pre_train): para.set_value(pre) tmp = [debug_model(i) for i in xrange(n_train_batches)] tmp = (np.asarray(tmp)).mean(axis=0) / float(batch_size) print '------------------', tmp ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_bound = -1000000.0 best_iter = 0 test_score = 0. start_time = time.clock() NaN_count = 0 epoch = 0 threshold = 0 validation_frequency = 1 generatition_frequency = 10 if predir is not None: threshold = 0 color.printRed('threshold, ' + str(threshold) + ' generatition_frequency, ' + str(generatition_frequency) + ' validation_frequency, ' + str(validation_frequency)) done_looping = False n_epochs = 600 decay_epochs = 500 ''' print 'test initialization...' pre_model = parameters() for i in xrange(len(pre_model)): pre_model[i] = np.asarray(pre_model[i]) print pre_model[i].shape, np.mean(pre_model[i]), np.var(pre_model[i]) print 'end test...' ''' while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 minibatch_avg_cost = 0 tmp_start1 = time.clock() test_epoch = epoch - decay_epochs if test_epoch > 0 and test_epoch % 10 == 0: print l_r.get_value() with open(logdir + 'hook.txt', 'a') as f: print >> f, l_r.get_value() l_r.set_value(np.cast['float32'](l_r.get_value() / 3.0)) for minibatch_index in xrange(n_train_batches): #print minibatch_index ''' color.printRed('lalala') xxx = dims(minibatch_index) print xxx.shape ''' #print n_train_batches minibatch_avg_cost += train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if math.isnan(minibatch_avg_cost): NaN_count += 1 color.printRed("NaN detected. Reverting to saved best parameters") print '---------------NaN_count:', NaN_count with open(logdir + 'hook.txt', 'a') as f: print >> f, '---------------NaN_count:', NaN_count tmp = [debug_model(i) for i in xrange(n_train_batches)] tmp = (np.asarray(tmp)).mean(axis=0) / float(batch_size) print '------------------NaN check:', tmp with open(logdir + 'hook.txt', 'a') as f: print >> f, '------------------NaN check:', tmp model = parameters() for i in xrange(len(model)): model[i] = np.asarray(model[i]).astype(np.float32) print model[i].shape, np.mean(model[i]), np.var(model[i]) print np.max(model[i]), np.min(model[i]) print np.all(np.isfinite(model[i])), np.any(np.isnan(model[i])) with open(logdir + 'hook.txt', 'a') as f: print >> f, model[i].shape, np.mean(model[i]), np.var( model[i]) print >> f, np.max(model[i]), np.min(model[i]) print >> f, np.all(np.isfinite(model[i])), np.any( np.isnan(model[i])) best_before = np.load(logdir + 'model.npz') best_before = best_before['model'] for (para, pre) in zip(params, best_before): para.set_value(pre) tmp = [debug_model(i) for i in xrange(n_train_batches)] tmp = (np.asarray(tmp)).mean(axis=0) / float(batch_size) print '------------------', tmp return #print 'optimization_time', time.clock() - tmp_start1 print epoch, 'stochastic training error', minibatch_avg_cost / float( n_train_batches * batch_size) with open(logdir + 'hook.txt', 'a') as f: print >> f, epoch, 'stochastic training error', minibatch_avg_cost / float( n_train_batches * batch_size) if epoch % validation_frequency == 0: tmp_start2 = time.clock() test_losses = [test_model(i) for i in xrange(n_test_batches)] this_test_bound = np.mean(test_losses) / float(batch_size) #tmp = [debug_model(i) for i # in xrange(n_train_batches)] #tmp = (np.asarray(tmp)).mean(axis=0) / float(batch_size) print epoch, 'test bound', this_test_bound #print tmp with open(logdir + 'hook.txt', 'a') as f: print >> f, epoch, 'test bound', this_test_bound if epoch % 100 == 0: model = parameters() for i in xrange(len(model)): model[i] = np.asarray(model[i]).astype(np.float32) np.savez(logdir + 'model-' + str(epoch), model=model) for i in xrange(n_train_batches): if i == 0: train_features = np.asarray(train_activations(i)) else: train_features = np.vstack( (train_features, np.asarray(train_activations(i)))) for i in xrange(n_valid_batches): if i == 0: valid_features = np.asarray(valid_activations(i)) else: valid_features = np.vstack( (valid_features, np.asarray(valid_activations(i)))) for i in xrange(n_test_batches): if i == 0: test_features = np.asarray(test_activations(i)) else: test_features = np.vstack( (test_features, np.asarray(test_activations(i)))) np.save(logdir + 'train_features', train_features) np.save(logdir + 'valid_features', valid_features) np.save(logdir + 'test_features', test_features) tmp_start4 = time.clock() if epoch % generatition_frequency == 0: tail = '-' + str(epoch) + '.png' random_z = np.random.standard_normal( (n_batch, n_hidden[-1])).astype(np.float32) _x_mean, _x = random_generation(random_z) #print _x.shape #print _x_mean.shape image = paramgraphics.mat_to_img(_x.T, dim_input, colorImg=colorImg) image.save(logdir + 'samples' + tail, 'PNG') image = paramgraphics.mat_to_img(_x_mean.T, dim_input, colorImg=colorImg) image.save(logdir + 'mean_samples' + tail, 'PNG') #print 'generation_time', time.clock() - tmp_start4 end_time = time.clock() print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if NaN_count > 0: print '---------------NaN_count:', NaN_count with open(logdir + 'hook.txt', 'a') as f: print >> f, '---------------NaN_count:', NaN_count
batch3_data = x batch3_labels = [] for i in np.arange(len(y)): batch3_labels.append(y[i]) batch3_data = np.asarray(batch3_data).T batch3_labels = np.asarray(batch3_labels) print 'Check n x f' print batch1_data.shape print batch1_labels.shape print batch2_data.shape print batch2_labels.shape print batch3_data.shape print batch3_labels.shape image = paramgraphics.mat_to_img(batch1_data[:100,:].T, dim_input, colorImg=colorImg, scale=True) image.save(saveDir+'svhn_train.png', 'PNG') image = paramgraphics.mat_to_img(batch2_data[:100,:].T, dim_input, colorImg=colorImg, scale=True) image.save(saveDir+'svhn_valid.png', 'PNG') image = paramgraphics.mat_to_img(batch3_data[:100,:].T, dim_input, colorImg=colorImg, scale=True) image.save(saveDir+'svhn_test.png', 'PNG') if preprocessing == 'gcn_var': batch1_data = pypp.global_contrast_normalize(batch1_data, subtract_mean=True, use_std=True) batch2_data = pypp.global_contrast_normalize(batch2_data, subtract_mean=True, use_std=True) batch3_data = pypp.global_contrast_normalize(batch3_data, subtract_mean=True, use_std=True) elif preprocessing == 'gcn_norm': batch1_data = pypp.global_contrast_normalize(batch1_data, subtract_mean=True) batch2_data = pypp.global_contrast_normalize(batch2_data, subtract_mean=True) batch3_data = pypp.global_contrast_normalize(batch3_data, subtract_mean=True) print batch1_data.shape
print line with open(logfile, 'a') as f: f.write(line + "\n") # random generation for visualization import util.paramgraphics as paramgraphics import scipy.io as sio tail = '-' + str(epoch) + '.png' _x_mean, _x = generate_model(num_generation) _x_mean = _x_mean.reshape((num_generation, -1)) _x = _x.reshape((num_generation, -1)) sio.savemat( os.path.join(res_out, 'array_images-' + str(epoch) + '.mat'), {'data': _x_mean}) image = paramgraphics.mat_to_img(_x.T, dim_input, colorImg=colorImg, scale=generation_scale) image.save(os.path.join(res_out, 'samples' + tail), 'PNG') image = paramgraphics.mat_to_img(_x_mean.T, dim_input, colorImg=colorImg, scale=generation_scale) image.save(os.path.join(res_out, 'mean_samples' + tail), 'PNG') ''' if dataset in ['norb_48', 'norb_96']: image = paramgraphics.mat_to_img(_x_mean.T, dim_input, colorImg=colorImg, scale=True) image.save(os.path.join(res_out, 'mean_samples_scale'+tail), 'PNG') import nn_search if epoch % 250 == 0: nn = nn_search.nn_search(_x_mean, train_x) image = paramgraphics.mat_to_img(nn.T, dim_input, colorImg=colorImg, scale=True)
def c_6layer_svhn_imputation(seed=0, ctype='cva', pertub_type=5, pertub_prob=0, pertub_prob1=16, visualization_times=20, denoise_times=200, predir=None, n_batch=900, batch_size=500): """ Missing data imputation """ ''' svhn ''' n_channels = 3 colorImg = True dim_w = 32 dim_h = 32 dim_input=(dim_h, dim_w) n_classes = 10 first_drop=0.6 if os.environ.has_key('first_drop'): first_drop = float(os.environ['first_drop']) last_drop=1 if os.environ.has_key('last_drop'): last_drop = float(os.environ['last_drop']) nkerns_1=96 if os.environ.has_key('nkerns_1'): nkerns_1 = int(os.environ['nkerns_1']) nkerns_2=96 if os.environ.has_key('nkerns_2'): nkerns_2 = int(os.environ['nkerns_2']) opt_med='mom' if os.environ.has_key('opt_med'): opt_med = os.environ['opt_med'] train_logvar=True if os.environ.has_key('train_logvar'): train_logvar = bool(int(os.environ['train_logvar'])) dataset='svhnlcn' if os.environ.has_key('dataset'): dataset = os.environ['dataset'] n_z=256 if os.environ.has_key('n_z'): n_z = int(os.environ['n_z']) #cp->cd->cpd->cd->c nkerns=[nkerns_1, nkerns_1, nkerns_1, nkerns_2, nkerns_2] drops=[0, 1, 1, 1, 0, 1] drop_p=[1, first_drop, first_drop, first_drop, 1, last_drop] n_hidden=[n_z] logdir = 'results/imputation/'+ctype+'/svhn/'+ctype+'_6layer_'+dataset+'_' logdir += str(int(time.time()))+'/' if not os.path.exists(logdir): os.makedirs(logdir) print predir with open(logdir+'hook.txt', 'a') as f: print >>f, predir color.printRed('dataset '+dataset) test_set_x, test_set_x_pertub, pertub_label, pertub_number = datapy.load_pertub_data_svhn(dirs='data_imputation/', dataset=dataset, pertub_type=pertub_type, pertub_prob=pertub_prob, pertub_prob1=pertub_prob1) pixel_max, pixel_min = datapy.load_max_min(dirs='data_imputation/', dataset=dataset, pertub_prob=pertub_prob) # compute number of minibatches for training, validation and testing #n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels random_z = T.matrix('random_z') x_pertub = T.matrix('x_pertub') # the data is presented as rasterized images p_label = T.matrix('p_label') drop = T.iscalar('drop') activation = nonlinearity.relu rng = np.random.RandomState(seed) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) input_x = x_pertub.reshape((batch_size, n_channels, dim_h, dim_w)) recg_layer = [] cnn_output = [] l = [] d = [] #1 recg_layer.append(ConvMaxPool_GauInit_DNN.ConvMaxPool_GauInit_DNN( rng, image_shape=(batch_size, n_channels, dim_h, dim_w), filter_shape=(nkerns[0], n_channels, 5, 5), poolsize=(2, 2), border_mode='same', activation=activation )) if drops[0]==1: cnn_output.append(recg_layer[-1].drop_output(input=input_x, drop=drop, rng=rng_share, p=drop_p[0])) else: cnn_output.append(recg_layer[-1].output(input=input_x)) l+=[1, 2] d+=[1, 1] #2 recg_layer.append(ConvMaxPool_GauInit_DNN.ConvMaxPool_GauInit_DNN( rng, image_shape=(batch_size, nkerns[0], 16, 16), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[1]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share, p=drop_p[1])) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) l+=[1, 2] d+=[1, 1] #3 recg_layer.append(ConvMaxPool_GauInit_DNN.ConvMaxPool_GauInit_DNN( rng, image_shape=(batch_size, nkerns[1], 16, 16), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), border_mode='same', activation=activation )) if drops[2]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share, p=drop_p[2])) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) l+=[1, 2] d+=[1, 1] #4 recg_layer.append(ConvMaxPool_GauInit_DNN.ConvMaxPool_GauInit_DNN( rng, image_shape=(batch_size, nkerns[2], 8, 8), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[3]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share, p=drop_p[3])) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) l+=[1, 2] d+=[1, 1] #5 ''' --------------------- (2,2) or (4,4) ''' recg_layer.append(ConvMaxPool_GauInit_DNN.ConvMaxPool_GauInit_DNN( rng, image_shape=(batch_size, nkerns[3], 8, 8), filter_shape=(nkerns[4], nkerns[3], 3, 3), poolsize=(2, 2), border_mode='same', activation=activation )) if drops[4]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share, p=drop_p[4])) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) l+=[1, 2] d+=[1, 1] mlp_input_x = cnn_output[-1].flatten(2) activations = [] activations.append(mlp_input_x) #1 ''' ---------------------No MLP ''' ''' recg_layer.append(FullyConnected.FullyConnected( rng=rng, n_in= 4 * 4 * nkerns[-1], n_out=n_hidden[0], activation=activation )) if drops[-1]==1: activations.append(recg_layer[-1].drop_output(input=mlp_input_x, drop=drop, rng=rng_share, p=drop_p[-1])) else: activations.append(recg_layer[-1].output(input=mlp_input_x)) ''' #stochastic layer recg_layer.append(GaussianHidden.GaussianHidden( rng=rng, input=activations[-1], n_in=4 * 4 * nkerns[-1], n_out=n_hidden[0], activation=None )) l+=[1, 2] d+=[1, 1] l+=[1, 2] d+=[1, 1] z = recg_layer[-1].sample_z(rng_share) gene_layer = [] z_output = [] random_z_output = [] #1 gene_layer.append(FullyConnected.FullyConnected( rng=rng, n_in=n_hidden[0], n_out = 4*4*nkerns[-1], activation=activation )) z_output.append(gene_layer[-1].output(input=z)) random_z_output.append(gene_layer[-1].output(input=random_z)) l+=[1, 2] d+=[1, 1] #2 ''' gene_layer.append(FullyConnected.FullyConnected( rng=rng, n_in=n_hidden[0], n_out = 4*4*nkerns[-1], activation=activation )) if drop_inverses[0]==1: z_output.append(gene_layer[-1].drop_output(input=z_output[-1], drop=drop_inverse, rng=rng_share)) random_z_output.append(gene_layer[-1].drop_output(input=random_z_output[-1], drop=drop_inverse, rng=rng_share)) else: z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output(input=random_z_output[-1])) ''' input_z = z_output[-1].reshape((batch_size, nkerns[-1], 4, 4)) input_random_z = random_z_output[-1].reshape((n_batch, nkerns[-1], 4, 4)) #1 gene_layer.append(UnpoolConvNon_GauInit_DNN.UnpoolConvNon_GauInit_DNN( rng, image_shape=(batch_size, nkerns[-1], 4, 4), filter_shape=(nkerns[-2], nkerns[-1], 3, 3), poolsize=(2, 2), border_mode='same', activation=activation )) l+=[1, 2] d+=[1, 1] z_output.append(gene_layer[-1].output(input=input_z)) random_z_output.append(gene_layer[-1].output_random_generation(input=input_random_z, n_batch=n_batch)) #2 gene_layer.append(UnpoolConvNon_GauInit_DNN.UnpoolConvNon_GauInit_DNN( rng, image_shape=(batch_size, nkerns[-2], 8, 8), filter_shape=(nkerns[-3], nkerns[-2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) l+=[1, 2] d+=[1, 1] z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) #3 gene_layer.append(UnpoolConvNon_GauInit_DNN.UnpoolConvNon_GauInit_DNN( rng, image_shape=(batch_size, nkerns[-3], 8, 8), filter_shape=(nkerns[-4], nkerns[-3], 3, 3), poolsize=(2, 2), border_mode='same', activation=activation )) l+=[1, 2] d+=[1, 1] z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) #4 gene_layer.append(UnpoolConvNon_GauInit_DNN.UnpoolConvNon_GauInit_DNN( rng, image_shape=(batch_size, nkerns[-4], 16, 16), filter_shape=(nkerns[-5], nkerns[-4], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) l+=[1, 2] d+=[1, 1] z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) #5-1 stochastic layer # for this layer, the activation is None to get a Guassian mean gene_layer.append(UnpoolConvNon_GauInit_DNN.UnpoolConvNon_GauInit_DNN( rng, image_shape=(batch_size, nkerns[-5], 16, 16), filter_shape=(n_channels, nkerns[-5], 5, 5), poolsize=(2, 2), border_mode='same', activation=None )) l+=[1, 2] d+=[1, 1] x_mean=gene_layer[-1].output(input=z_output[-1]) random_x_mean=gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch) #5-2 stochastic layer # for this layer, the activation is None to get logvar if train_logvar: gene_layer.append(UnpoolConvNon_GauInit_DNN.UnpoolConvNon_GauInit_DNN( rng, image_shape=(batch_size, nkerns[-5], 16, 16), filter_shape=(n_channels, nkerns[-5], 5, 5), poolsize=(2, 2), border_mode='same', activation=None )) l+=[1, 2] d+=[1, 1] x_logvar=gene_layer[-1].output(input=z_output[-1]) random_x_logvar=gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch) else: x_logvar = theano.shared(np.ones((batch_size, n_channels, dim_h, dim_w), dtype='float32')) random_x_logvar = theano.shared(np.ones((n_batch, n_channels, dim_h, dim_w), dtype='float32')) gene_layer.append(NoParamsGaussianVisiable.NoParamsGaussianVisiable( #rng=rng, #mean=z_output[-1], #data=input_x, )) logpx = gene_layer[-1].logpx(mean=x_mean, logvar=x_logvar, data=input_x) random_x = gene_layer[-1].sample_x(rng_share=rng_share, mean=random_x_mean, logvar=random_x_logvar) x_denoised = p_label*x+(1-p_label)*x_mean.flatten(2) mse = ((x - x_denoised)**2).sum() / pertub_number params=[] for g in gene_layer: params+=g.params for r in recg_layer: params+=r.params ''' train_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x_pertub: train_set_x[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) ''' ''' valid_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x_pertub: valid_set_x[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) ''' test_activations = theano.function( inputs=[x_pertub], outputs=T.concatenate(activations, axis=1), givens={ drop: np.cast['int32'](0) } ) imputation_model = theano.function( inputs=[index, x_pertub], outputs=[x_denoised, mse], givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], p_label:pertub_label[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), #drop_inverse: np.cast['int32'](0) } ) ################## # Pretrain MODEL # ################## model_epoch = 100 if os.environ.has_key('model_epoch'): model_epoch = int(os.environ['model_epoch']) if predir is not None: color.printBlue('... setting parameters') color.printBlue(predir) if model_epoch == -1: pre_train = np.load(predir+'best-model.npz') else: pre_train = np.load(predir+'model-'+str(model_epoch)+'.npz') pre_train = pre_train['model'] if ctype == 'cva': for (para, pre) in zip(params, pre_train): para.set_value(pre) elif ctype == 'cmmva': for (para, pre) in zip(params, pre_train[:-2]): para.set_value(pre) else: exit() else: exit() ############### # TRAIN MODEL # ############### print '... training' scale = False epoch = 0 n_visualization = 900 pixel_max = pixel_max[:n_visualization] pixel_min = pixel_min[:n_visualization] output = np.ones((n_visualization, visualization_times+2, n_channels*dim_input[0]*dim_input[1])) output[:,0,:] = test_set_x.get_value()[:n_visualization,:] output[:,1,:] = test_set_x_pertub.get_value()[:n_visualization,:] image = paramgraphics.mat_to_img(paramgraphics.scale_max_min(output[:,0,:].T,pixel_max,pixel_min), dim_input, colorImg=colorImg, scale=scale) image.save(logdir+'data.png', 'PNG') image = paramgraphics.mat_to_img(paramgraphics.scale_max_min(output[:,1,:].T,pixel_max,pixel_min), dim_input, colorImg=colorImg, scale=scale) image.save(logdir+'data_pertub.png', 'PNG') tmp = test_set_x_pertub.get_value() while epoch < denoise_times: epoch = epoch + 1 for i in xrange(n_test_batches): d, m = imputation_model(i, tmp[i * batch_size: (i + 1) * batch_size]) tmp[i * batch_size: (i + 1) * batch_size] = np.asarray(d) if epoch<=visualization_times: output[:,epoch+1,:] = tmp[:n_visualization,:] image = paramgraphics.mat_to_img(paramgraphics.scale_max_min(tmp[:n_visualization,:].T,pixel_max,pixel_min), dim_input, colorImg=colorImg, scale=scale) image.save(logdir+'procedure-'+str(epoch)+'.png', 'PNG') np.savez(logdir+'procedure-'+str(epoch), tmp=tmp) ''' image = paramgraphics.mat_to_img((output.reshape(-1,32*32*3)).T, dim_input, colorImg=colorImg, tile_shape=(n_visualization,22), scale=scale) image.save(logdir+'output.png', 'PNG') np.savez(logdir+'output', output=output) ''' '''
tmp_a[start:end] = 0 #print tmp_a.shape #print tmp_a tmp_b = np.tile(tmp_a, height*3) print tmp_b.shape print pertub_label.shape pertub_label = (pertub_label.T*tmp_b).T data_perturbed = pertub_label*data+(1-pertub_label)*data_perturbed h1,b1= np.histogram(data, bins=10) h2,b2= np.histogram(data_perturbed, bins=10) print h1,b1 print h2,b2 if pertub_type == 4: sio.savemat('data_imputation/'+dataset+'_type_'+str(pertub_type)+'_params_'+str(int(pertub_prob*100))+'_noise_rawdata.mat', {'z_test_original' : data, 'z_test' : data_perturbed, 'pertub_label' : pertub_label}) elif pertub_type == 3: sio.savemat('data_imputation/'+dataset+'_type_'+str(pertub_type)+'_params_'+str(pertub_prob)+'_noise_rawdata.mat', {'z_test_original' : data, 'z_test' : data_perturbed, 'pertub_label' : pertub_label}) elif pertub_type == 5: sio.savemat('data_imputation/'+dataset+'_type_'+str(pertub_type)+'_params_'+str(start)+'_'+str(end)+'_noise_rawdata.mat', {'z_test_original' : data, 'z_test' : data_perturbed, 'pertub_label' : pertub_label}) sio.savemat('data_imputation/'+dataset+'_params_'+str(int(pertub_prob))+'_max_min_pixel.mat', {'pixel_max':pixel_max, 'pixel_min':pixel_min}) print data_perturbed[:,:25].shape scale = False image = paramgraphics.mat_to_img(paramgraphics.scale_max_min(data_perturbed[:,:25],pixel_max,pixel_min), (32,32), colorImg=True, scale=scale) image.save('data_imputation/'+dataset+'_'+'test_noise_type_'+str(pertub_type)+'_params_'+str(pertub_prob)+'.png', 'PNG') print data[:,:25].shape image = paramgraphics.mat_to_img(paramgraphics.scale_max_min(data[:,:25],pixel_max,pixel_min), (32,32), colorImg=True, scale=scale) image.save('data_imputation/'+dataset+'_'+'test_original_type_'+str(pertub_type)+'_params_'+str(pertub_prob)+'.png', 'PNG')
def cmmva_6layer_dropout_mnist_60000(seed=0, start_layer=0, end_layer=1, dropout_flag=1, drop_inverses_flag=0, learning_rate=3e-5, predir=None, n_batch=144, dataset='mnist.pkl.gz', batch_size=500, nkerns=[20, 50], n_hidden=[500, 50]): """ Implementation of convolutional MMVA """ #cp->cd->cpd->cd->c nkerns=[32, 32, 64, 64, 64] drops=[1, 0, 1, 0, 0, 1] #skerns=[5, 3, 3, 3, 3] #pools=[2, 1, 1, 2, 1] #modes=['same']*5 n_hidden=[500, 50] drop_inverses=[1,] # 28->12->12->5->5/5*5*64->500->50->500->5*5*64/5->5->12->12->28 if dataset=='mnist.pkl.gz': dim_input=(28, 28) colorImg=False D = 1.0 C = 1.0 if os.environ.has_key('C'): C = np.cast['float32'](float((os.environ['C']))) if os.environ.has_key('D'): D = np.cast['float32'](float((os.environ['D']))) color.printRed('D '+str(D)+' C '+str(C)) logdir = 'results/supervised/cmmva/mnist/cmmva_6layer_60000_'+str(nkerns)+str(n_hidden)+'_D_'+str(D)+'_C_'+str(C)+'_'+str(learning_rate)+'_' if predir is not None: logdir +='pre_' if dropout_flag == 1: logdir += ('dropout_'+str(drops)+'_') if drop_inverses_flag==1: logdir += ('inversedropout_'+str(drop_inverses)+'_') logdir += str(int(time.time()))+'/' if not os.path.exists(logdir): os.makedirs(logdir) print 'logdir:', logdir, 'predir', predir print 'cmmva_6layer_mnist_60000', nkerns, n_hidden, seed, drops, drop_inverses, dropout_flag, drop_inverses_flag with open(logdir+'hook.txt', 'a') as f: print >>f, 'logdir:', logdir, 'predir', predir print >>f, 'cmmva_6layer_mnist_60000', nkerns, n_hidden, seed, drops, drop_inverses, dropout_flag, drop_inverses_flag datasets = datapy.load_data_gpu_60000(dataset, have_matrix=True) train_set_x, train_set_y, train_y_matrix = datasets[0] valid_set_x, valid_set_y, valid_y_matrix = datasets[1] test_set_x, test_set_y, test_y_matrix = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels y_matrix = T.imatrix('y_matrix') random_z = T.matrix('random_z') drop = T.iscalar('drop') drop_inverse = T.iscalar('drop_inverse') activation = nonlinearity.relu rng = np.random.RandomState(seed) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) input_x = x.reshape((batch_size, 1, 28, 28)) recg_layer = [] cnn_output = [] #1 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), border_mode='valid', activation=activation )) if drops[0]==1: cnn_output.append(recg_layer[-1].drop_output(input=input_x, drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(input=input_x)) #2 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[1]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #3 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[1], 12, 12), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), border_mode='valid', activation=activation )) if drops[2]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #4 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[2], 5, 5), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[3]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #5 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[3], 5, 5), filter_shape=(nkerns[4], nkerns[3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[4]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) mlp_input_x = cnn_output[-1].flatten(2) activations = [] #1 recg_layer.append(FullyConnected.FullyConnected( rng=rng, n_in= 5 * 5 * nkerns[-1], n_out=n_hidden[0], activation=activation )) if drops[-1]==1: activations.append(recg_layer[-1].drop_output(input=mlp_input_x, drop=drop, rng=rng_share)) else: activations.append(recg_layer[-1].output(input=mlp_input_x)) features = T.concatenate(activations[start_layer:end_layer], axis=1) color.printRed('feature dimension: '+str(np.sum(n_hidden[start_layer:end_layer]))) classifier = Pegasos.Pegasos( input= features, rng=rng, n_in=np.sum(n_hidden[start_layer:end_layer]), n_out=10, weight_decay=0, loss=1, std=1e-2 ) recg_layer.append(GaussianHidden.GaussianHidden( rng=rng, input=activations[-1], n_in=n_hidden[0], n_out = n_hidden[1], activation=None )) z = recg_layer[-1].sample_z(rng_share) gene_layer = [] z_output = [] random_z_output = [] #1 gene_layer.append(FullyConnected.FullyConnected( rng=rng, n_in=n_hidden[1], n_out = n_hidden[0], activation=activation )) z_output.append(gene_layer[-1].output(input=z)) random_z_output.append(gene_layer[-1].output(input=random_z)) #2 gene_layer.append(FullyConnected.FullyConnected( rng=rng, n_in=n_hidden[0], n_out = 5*5*nkerns[-1], activation=activation )) if drop_inverses[0]==1: z_output.append(gene_layer[-1].drop_output(input=z_output[-1], drop=drop_inverse, rng=rng_share)) random_z_output.append(gene_layer[-1].drop_output(input=random_z_output[-1], drop=drop_inverse, rng=rng_share)) else: z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output(input=random_z_output[-1])) input_z = z_output[-1].reshape((batch_size, nkerns[-1], 5, 5)) input_random_z = random_z_output[-1].reshape((n_batch, nkerns[-1], 5, 5)) #1 gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-1], 5, 5), filter_shape=(nkerns[-2], nkerns[-1], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) z_output.append(gene_layer[-1].output(input=input_z)) random_z_output.append(gene_layer[-1].output_random_generation(input=input_random_z, n_batch=n_batch)) #2 gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-2], 5, 5), filter_shape=(nkerns[-3], nkerns[-2], 3, 3), poolsize=(2, 2), border_mode='full', activation=activation )) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) #3 gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-3], 12, 12), filter_shape=(nkerns[-4], nkerns[-3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) #4 gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-4], 12, 12), filter_shape=(nkerns[-5], nkerns[-4], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) #5 stochastic layer # for the last layer, the nonliearity should be sigmoid to achieve mean of Bernoulli gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-5], 12, 12), filter_shape=(1, nkerns[-5], 5, 5), poolsize=(2, 2), border_mode='full', activation=nonlinearity.sigmoid )) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) gene_layer.append(NoParamsBernoulliVisiable.NoParamsBernoulliVisiable( #rng=rng, #mean=z_output[-1], #data=input_x, )) logpx = gene_layer[-1].logpx(mean=z_output[-1], data=input_x) # 4-D tensor of random generation random_x_mean = random_z_output[-1] random_x = gene_layer[-1].sample_x(rng_share, random_x_mean) #L = (logpx + logpz - logqz).sum() lowerbound = ( (logpx + recg_layer[-1].logpz - recg_layer[-1].logqz).sum() ) hinge_loss = classifier.hinge_loss(10, y, y_matrix) * batch_size # # D is redundent, you could just set D = 1 and tune C and weight decay parameters # beacuse AdaM is scale-invariant # cost = D * lowerbound - C * hinge_loss #- classifier.L2_reg px = (logpx.sum()) pz = (recg_layer[-1].logpz.sum()) qz = (- recg_layer[-1].logqz.sum()) params=[] for g in gene_layer: params+=g.params for r in recg_layer: params+=r.params params+=classifier.params gparams = [T.grad(cost, param) for param in params] weight_decay=1.0/n_train_batches epsilon=1e-8 #get_optimizer = optimizer.get_adam_optimizer(learning_rate=learning_rate) l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32)) get_optimizer = optimizer.get_adam_optimizer_max(learning_rate=l_r, decay1=0.1, decay2=0.001, weight_decay=weight_decay, epsilon=epsilon) with open(logdir+'hook.txt', 'a') as f: print >>f, 'AdaM', learning_rate, weight_decay, epsilon updates = get_optimizer(params,gparams) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=[classifier.errors(y), lowerbound, hinge_loss, cost], #outputs=layer[-1].errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), drop_inverse: np.cast['int32'](0) } ) validate_model = theano.function( inputs=[index], outputs=[classifier.errors(y), lowerbound, hinge_loss, cost], #outputs=layer[-1].errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), drop_inverse: np.cast['int32'](0) } ) ''' Save parameters and activations ''' parameters = theano.function( inputs=[], outputs=params, ) train_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), #drop_inverse: np.cast['int32'](0) #y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) valid_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), #drop_inverse: np.cast['int32'](0) #y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) test_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), #drop_inverse: np.cast['int32'](0) #y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` debug_model = theano.function( inputs=[index], outputs=[classifier.errors(y), lowerbound, px, pz, qz, hinge_loss, cost], #updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](dropout_flag), drop_inverse: np.cast['int32'](drop_inverses_flag) } ) random_generation = theano.function( inputs=[random_z], outputs=[random_x_mean.flatten(2), random_x.flatten(2)], givens={ #drop: np.cast['int32'](0), drop_inverse: np.cast['int32'](0) } ) train_bound_without_dropout = theano.function( inputs=[index], outputs=[classifier.errors(y), lowerbound, hinge_loss, cost], givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), drop_inverse: np.cast['int32'](0) } ) train_model = theano.function( inputs=[index], outputs=[classifier.errors(y), lowerbound, hinge_loss, cost], updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](dropout_flag), drop_inverse: np.cast['int32'](drop_inverses_flag) } ) # end-snippet-5 ################## # Pretrain MODEL # ################## if predir is not None: color.printBlue('... setting parameters') color.printBlue(predir) pre_train = np.load(predir+'model.npz') pre_train = pre_train['model'] # params include w and b, exclude it for (para, pre) in zip(params[:-2], pre_train): #print pre.shape para.set_value(pre) tmp = [debug_model(i) for i in xrange(n_train_batches)] tmp = (np.asarray(tmp)).mean(axis=0) / float(batch_size) print '------------------', tmp[1:5] # valid_error test_error epochs predy_test_stats = [1, 1, 0] predy_valid_stats = [1, 1, 0] best_validation_bound = -1000000.0 best_iter = 0 test_score = 0. start_time = time.clock() NaN_count = 0 epoch = 0 threshold = 0 validation_frequency = 1 generatition_frequency = 10 if predir is not None: threshold = 0 color.printRed('threshold, '+str(threshold) + ' generatition_frequency, '+str(generatition_frequency) +' validation_frequency, '+str(validation_frequency)) done_looping = False decay_epochs=500 n_epochs=600 ''' print 'test initialization...' pre_model = parameters() for i in xrange(len(pre_model)): pre_model[i] = np.asarray(pre_model[i]) print pre_model[i].shape, np.mean(pre_model[i]), np.var(pre_model[i]) print 'end test...' ''' while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 train_error = 0 train_lowerbound = 0 train_hinge_loss = 0 train_obj = 0 test_epoch = epoch - decay_epochs if test_epoch > 0 and test_epoch % 10 == 0: print l_r.get_value() with open(logdir+'hook.txt', 'a') as f: print >>f,l_r.get_value() l_r.set_value(np.cast['float32'](l_r.get_value()/3.0)) tmp_start1 = time.clock() for minibatch_index in xrange(n_train_batches): #print n_train_batches e, l, h, o = train_model(minibatch_index) train_error += e train_lowerbound += l train_hinge_loss += h train_obj += o # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if math.isnan(train_lowerbound): NaN_count+=1 color.printRed("NaN detected. Reverting to saved best parameters") print '---------------NaN_count:', NaN_count with open(logdir+'hook.txt', 'a') as f: print >>f, '---------------NaN_count:', NaN_count tmp = [debug_model(i) for i in xrange(n_train_batches)] tmp = (np.asarray(tmp)).mean(axis=0) / float(batch_size) tmp[0]*=batch_size print '------------------NaN check:', tmp with open(logdir+'hook.txt', 'a') as f: print >>f, '------------------NaN check:', tmp model = parameters() for i in xrange(len(model)): model[i] = np.asarray(model[i]).astype(np.float32) print model[i].shape, np.mean(model[i]), np.var(model[i]) print np.max(model[i]), np.min(model[i]) print np.all(np.isfinite(model[i])), np.any(np.isnan(model[i])) with open(logdir+'hook.txt', 'a') as f: print >>f, model[i].shape, np.mean(model[i]), np.var(model[i]) print >>f, np.max(model[i]), np.min(model[i]) print >>f, np.all(np.isfinite(model[i])), np.any(np.isnan(model[i])) best_before = np.load(logdir+'model.npz') best_before = best_before['model'] for (para, pre) in zip(params, best_before): para.set_value(pre) tmp = [debug_model(i) for i in xrange(n_train_batches)] tmp = (np.asarray(tmp)).mean(axis=0) / float(batch_size) tmp[0]*=batch_size print '------------------', tmp continue n_train=n_train_batches*batch_size #print 'optimization_time', time.clock() - tmp_start1 print epoch, 'stochastic training error', train_error / float(batch_size), train_lowerbound / float(n_train), train_hinge_loss / float(n_train), train_obj / float(n_train) with open(logdir+'hook.txt', 'a') as f: print >>f, epoch, 'stochastic training error', train_error / float(batch_size), train_lowerbound / float(n_train), train_hinge_loss / float(n_train), train_obj / float(n_train) if epoch % validation_frequency == 0: tmp_start2 = time.clock() # compute zero-one loss on validation set #train_stats = [train_bound_without_dropout(i) for i # in xrange(n_train_batches)] #this_train_stats = np.mean(train_stats, axis=0) #this_train_stats[1:] = this_train_stats[1:]/ float(batch_size) test_stats = [test_model(i) for i in xrange(n_test_batches)] this_test_stats = np.mean(test_stats, axis=0) this_test_stats[1:] = this_test_stats[1:]/ float(batch_size) print epoch, 'test error', this_test_stats with open(logdir+'hook.txt', 'a') as f: print >>f, epoch, 'test error', this_test_stats if epoch%100==0: model = parameters() for i in xrange(len(model)): model[i] = np.asarray(model[i]).astype(np.float32) #print model[i].shape, np.mean(model[i]), np.var(model[i]) np.savez(logdir+'model-'+str(epoch), model=model) tmp_start4=time.clock() if epoch % generatition_frequency == 0: tail='-'+str(epoch)+'.png' random_z = np.random.standard_normal((n_batch, n_hidden[-1])).astype(np.float32) _x_mean, _x = random_generation(random_z) #print _x.shape #print _x_mean.shape image = paramgraphics.mat_to_img(_x.T, dim_input, colorImg=colorImg) image.save(logdir+'samples'+tail, 'PNG') image = paramgraphics.mat_to_img(_x_mean.T, dim_input, colorImg=colorImg) image.save(logdir+'mean_samples'+tail, 'PNG') #print 'generation_time', time.clock() - tmp_start4 end_time = time.clock() print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if NaN_count > 0: print '---------------NaN_count:', NaN_count with open(logdir+'hook.txt', 'a') as f: print >>f, '---------------NaN_count:', NaN_count
" TRAIN:\tCost=%.5f\tlogq(z|x)=%.5f\tlogp(z)=%.5f\tlogp(x|z)=%.5f\n" %(costs_train[-1], log_qz_given_x_train[-1], log_pz_train[-1], log_px_given_z_train[-1]) + \ " EVAL-L1:\tCost=%.5f\tlogq(z|x)=%.5f\tlogp(z)=%.5f\tlogp(x|z)=%.5f" %(LL_test1[-1], log_qz_given_x_test1[-1], log_pz_test1[-1], log_px_given_z_test1[-1]) print line with open(logfile,'a') as f: f.write(line + "\n") # random generation for visualization import util.paramgraphics as paramgraphics import scipy.io as sio tail='-'+str(epoch)+'.png' _x_mean, _x = generate_model(num_generation) _x_mean = _x_mean.reshape((num_generation,-1)) _x = _x.reshape((num_generation,-1)) sio.savemat(os.path.join(res_out,'array_images-'+str(epoch)+'.mat'), {'data':_x_mean}) image = paramgraphics.mat_to_img(_x.T, dim_input, colorImg=colorImg, scale=generation_scale) image.save(os.path.join(res_out, 'samples'+tail), 'PNG') image = paramgraphics.mat_to_img(_x_mean.T, dim_input, colorImg=colorImg, scale=generation_scale) image.save(os.path.join(res_out, 'mean_samples'+tail), 'PNG') ''' if dataset in ['norb_48', 'norb_96']: image = paramgraphics.mat_to_img(_x_mean.T, dim_input, colorImg=colorImg, scale=True) image.save(os.path.join(res_out, 'mean_samples_scale'+tail), 'PNG') import nn_search if epoch % 250 == 0: nn = nn_search.nn_search(_x_mean, train_x) image = paramgraphics.mat_to_img(nn.T, dim_input, colorImg=colorImg, scale=True) image.save(os.path.join(res_out, 'mean_samples_nn'+tail), 'PNG') ''' #save model every 100'th epochs
def cva_6layer_dropout_mnist_60000(seed=0, dropout_flag=1, drop_inverses_flag=0, learning_rate=3e-4, predir=None, n_batch=144, dataset='mnist.pkl.gz', batch_size=500, nkerns=[20, 50], n_hidden=[500, 50]): """ Implementation of convolutional VA """ #cp->cd->cpd->cd->c nkerns=[32, 32, 64, 64, 64] drops=[1, 0, 1, 0, 0] #skerns=[5, 3, 3, 3, 3] #pools=[2, 1, 1, 2, 1] #modes=['same']*5 n_hidden=[500, 50] drop_inverses=[1,] # 28->12->12->5->5/5*5*64->500->50->500->5*5*64/5->5->12->12->28 if dataset=='mnist.pkl.gz': dim_input=(28, 28) colorImg=False logdir = 'results/supervised/cva/mnist/cva_6layer_mnist_60000'+str(nkerns)+str(n_hidden)+'_'+str(learning_rate)+'_' if predir is not None: logdir +='pre_' if dropout_flag == 1: logdir += ('dropout_'+str(drops)+'_') if drop_inverses_flag==1: logdir += ('inversedropout_'+str(drop_inverses)+'_') logdir += str(int(time.time()))+'/' if not os.path.exists(logdir): os.makedirs(logdir) print 'logdir:', logdir, 'predir', predir print 'cva_6layer_mnist_60000', nkerns, n_hidden, seed, drops, drop_inverses, dropout_flag, drop_inverses_flag with open(logdir+'hook.txt', 'a') as f: print >>f, 'logdir:', logdir, 'predir', predir print >>f, 'cva_6layer_mnist_60000', nkerns, n_hidden, seed, drops, drop_inverses, dropout_flag, drop_inverses_flag datasets = datapy.load_data_gpu_60000(dataset, have_matrix=True) train_set_x, train_set_y, train_y_matrix = datasets[0] valid_set_x, valid_set_y, valid_y_matrix = datasets[1] test_set_x, test_set_y, test_y_matrix = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels random_z = T.matrix('random_z') drop = T.iscalar('drop') drop_inverse = T.iscalar('drop_inverse') activation = nonlinearity.relu rng = np.random.RandomState(seed) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) input_x = x.reshape((batch_size, 1, 28, 28)) recg_layer = [] cnn_output = [] #1 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), border_mode='valid', activation=activation )) if drops[0]==1: cnn_output.append(recg_layer[-1].drop_output(input=input_x, drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(input=input_x)) #2 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[1]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #3 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[1], 12, 12), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), border_mode='valid', activation=activation )) if drops[2]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #4 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[2], 5, 5), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[3]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #5 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[3], 5, 5), filter_shape=(nkerns[4], nkerns[3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[4]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) mlp_input_x = cnn_output[-1].flatten(2) activations = [] #1 recg_layer.append(FullyConnected.FullyConnected( rng=rng, n_in= 5 * 5 * nkerns[-1], n_out=n_hidden[0], activation=activation )) if drops[-1]==1: activations.append(recg_layer[-1].drop_output(input=mlp_input_x, drop=drop, rng=rng_share)) else: activations.append(recg_layer[-1].output(input=mlp_input_x)) #stochastic layer recg_layer.append(GaussianHidden.GaussianHidden( rng=rng, input=activations[-1], n_in=n_hidden[0], n_out = n_hidden[1], activation=None )) z = recg_layer[-1].sample_z(rng_share) gene_layer = [] z_output = [] random_z_output = [] #1 gene_layer.append(FullyConnected.FullyConnected( rng=rng, n_in=n_hidden[1], n_out = n_hidden[0], activation=activation )) z_output.append(gene_layer[-1].output(input=z)) random_z_output.append(gene_layer[-1].output(input=random_z)) #2 gene_layer.append(FullyConnected.FullyConnected( rng=rng, n_in=n_hidden[0], n_out = 5*5*nkerns[-1], activation=activation )) if drop_inverses[0]==1: z_output.append(gene_layer[-1].drop_output(input=z_output[-1], drop=drop_inverse, rng=rng_share)) random_z_output.append(gene_layer[-1].drop_output(input=random_z_output[-1], drop=drop_inverse, rng=rng_share)) else: z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output(input=random_z_output[-1])) input_z = z_output[-1].reshape((batch_size, nkerns[-1], 5, 5)) input_random_z = random_z_output[-1].reshape((n_batch, nkerns[-1], 5, 5)) #1 gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-1], 5, 5), filter_shape=(nkerns[-2], nkerns[-1], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) z_output.append(gene_layer[-1].output(input=input_z)) random_z_output.append(gene_layer[-1].output_random_generation(input=input_random_z, n_batch=n_batch)) #2 gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-2], 5, 5), filter_shape=(nkerns[-3], nkerns[-2], 3, 3), poolsize=(2, 2), border_mode='full', activation=activation )) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) #3 gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-3], 12, 12), filter_shape=(nkerns[-4], nkerns[-3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) #4 gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-4], 12, 12), filter_shape=(nkerns[-5], nkerns[-4], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) #5 stochastic layer # for the last layer, the nonliearity should be sigmoid to achieve mean of Bernoulli gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-5], 12, 12), filter_shape=(1, nkerns[-5], 5, 5), poolsize=(2, 2), border_mode='full', activation=nonlinearity.sigmoid )) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) gene_layer.append(NoParamsBernoulliVisiable.NoParamsBernoulliVisiable( #rng=rng, #mean=z_output[-1], #data=input_x, )) logpx = gene_layer[-1].logpx(mean=z_output[-1], data=input_x) # 4-D tensor of random generation random_x_mean = random_z_output[-1] random_x = gene_layer[-1].sample_x(rng_share, random_x_mean) #L = (logpx + logpz - logqz).sum() cost = ( (logpx + recg_layer[-1].logpz - recg_layer[-1].logqz).sum() ) px = (logpx.sum()) pz = (recg_layer[-1].logpz.sum()) qz = (- recg_layer[-1].logqz.sum()) params=[] for g in gene_layer: params+=g.params for r in recg_layer: params+=r.params gparams = [T.grad(cost, param) for param in params] weight_decay=1.0/n_train_batches l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32)) #get_optimizer = optimizer.get_adam_optimizer(learning_rate=learning_rate) get_optimizer = optimizer.get_adam_optimizer_max(learning_rate=l_r, decay1=0.1, decay2=0.001, weight_decay=weight_decay, epsilon=1e-8) with open(logdir+'hook.txt', 'a') as f: print >>f, 'AdaM', learning_rate, weight_decay updates = get_optimizer(params,gparams) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=cost, #outputs=layer[-1].errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], #y: test_set_y[index * batch_size:(index + 1) * batch_size], #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), drop_inverse: np.cast['int32'](0) } ) validate_model = theano.function( inputs=[index], outputs=cost, #outputs=layer[-1].errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], #y: valid_set_y[index * batch_size:(index + 1) * batch_size], #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), drop_inverse: np.cast['int32'](0) } ) ''' Save parameters and activations ''' parameters = theano.function( inputs=[], outputs=params, ) train_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), #drop_inverse: np.cast['int32'](0) #y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) valid_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), #drop_inverse: np.cast['int32'](0) #y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) test_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), #drop_inverse: np.cast['int32'](0) #y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` debug_model = theano.function( inputs=[index], outputs=[cost, px, pz, qz], #updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], #y: train_set_y[index * batch_size: (index + 1) * batch_size], #y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](dropout_flag), drop_inverse: np.cast['int32'](drop_inverses_flag) } ) random_generation = theano.function( inputs=[random_z], outputs=[random_x_mean.flatten(2), random_x.flatten(2)], givens={ #drop: np.cast['int32'](0), drop_inverse: np.cast['int32'](0) } ) train_bound_without_dropout = theano.function( inputs=[index], outputs=cost, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], #y: train_set_y[index * batch_size: (index + 1) * batch_size], #y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), drop_inverse: np.cast['int32'](0) } ) train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], #y: train_set_y[index * batch_size: (index + 1) * batch_size], #y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](dropout_flag), drop_inverse: np.cast['int32'](drop_inverses_flag) } ) ################## # Pretrain MODEL # ################## if predir is not None: color.printBlue('... setting parameters') color.printBlue(predir) pre_train = np.load(predir+'model.npz') pre_train = pre_train['model'] for (para, pre) in zip(params, pre_train): para.set_value(pre) tmp = [debug_model(i) for i in xrange(n_train_batches)] tmp = (np.asarray(tmp)).mean(axis=0) / float(batch_size) print '------------------', tmp ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_bound = -1000000.0 best_iter = 0 test_score = 0. start_time = time.clock() NaN_count = 0 epoch = 0 threshold = 0 validation_frequency = 1 generatition_frequency = 10 if predir is not None: threshold = 0 color.printRed('threshold, '+str(threshold) + ' generatition_frequency, '+str(generatition_frequency) +' validation_frequency, '+str(validation_frequency)) done_looping = False n_epochs = 600 decay_epochs = 500 ''' print 'test initialization...' pre_model = parameters() for i in xrange(len(pre_model)): pre_model[i] = np.asarray(pre_model[i]) print pre_model[i].shape, np.mean(pre_model[i]), np.var(pre_model[i]) print 'end test...' ''' while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 minibatch_avg_cost = 0 tmp_start1 = time.clock() test_epoch = epoch - decay_epochs if test_epoch > 0 and test_epoch % 10 == 0: print l_r.get_value() with open(logdir+'hook.txt', 'a') as f: print >>f,l_r.get_value() l_r.set_value(np.cast['float32'](l_r.get_value()/3.0)) for minibatch_index in xrange(n_train_batches): #print minibatch_index ''' color.printRed('lalala') xxx = dims(minibatch_index) print xxx.shape ''' #print n_train_batches minibatch_avg_cost += train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if math.isnan(minibatch_avg_cost): NaN_count+=1 color.printRed("NaN detected. Reverting to saved best parameters") print '---------------NaN_count:', NaN_count with open(logdir+'hook.txt', 'a') as f: print >>f, '---------------NaN_count:', NaN_count tmp = [debug_model(i) for i in xrange(n_train_batches)] tmp = (np.asarray(tmp)).mean(axis=0) / float(batch_size) print '------------------NaN check:', tmp with open(logdir+'hook.txt', 'a') as f: print >>f, '------------------NaN check:', tmp model = parameters() for i in xrange(len(model)): model[i] = np.asarray(model[i]).astype(np.float32) print model[i].shape, np.mean(model[i]), np.var(model[i]) print np.max(model[i]), np.min(model[i]) print np.all(np.isfinite(model[i])), np.any(np.isnan(model[i])) with open(logdir+'hook.txt', 'a') as f: print >>f, model[i].shape, np.mean(model[i]), np.var(model[i]) print >>f, np.max(model[i]), np.min(model[i]) print >>f, np.all(np.isfinite(model[i])), np.any(np.isnan(model[i])) best_before = np.load(logdir+'model.npz') best_before = best_before['model'] for (para, pre) in zip(params, best_before): para.set_value(pre) tmp = [debug_model(i) for i in xrange(n_train_batches)] tmp = (np.asarray(tmp)).mean(axis=0) / float(batch_size) print '------------------', tmp return #print 'optimization_time', time.clock() - tmp_start1 print epoch, 'stochastic training error', minibatch_avg_cost / float(n_train_batches*batch_size) with open(logdir+'hook.txt', 'a') as f: print >>f, epoch, 'stochastic training error', minibatch_avg_cost / float(n_train_batches*batch_size) if epoch % validation_frequency == 0: tmp_start2 = time.clock() test_losses = [test_model(i) for i in xrange(n_test_batches)] this_test_bound = np.mean(test_losses)/float(batch_size) #tmp = [debug_model(i) for i # in xrange(n_train_batches)] #tmp = (np.asarray(tmp)).mean(axis=0) / float(batch_size) print epoch, 'test bound', this_test_bound #print tmp with open(logdir+'hook.txt', 'a') as f: print >>f, epoch, 'test bound', this_test_bound if epoch%100==0: model = parameters() for i in xrange(len(model)): model[i] = np.asarray(model[i]).astype(np.float32) np.savez(logdir+'model-'+str(epoch), model=model) for i in xrange(n_train_batches): if i == 0: train_features = np.asarray(train_activations(i)) else: train_features = np.vstack((train_features, np.asarray(train_activations(i)))) for i in xrange(n_valid_batches): if i == 0: valid_features = np.asarray(valid_activations(i)) else: valid_features = np.vstack((valid_features, np.asarray(valid_activations(i)))) for i in xrange(n_test_batches): if i == 0: test_features = np.asarray(test_activations(i)) else: test_features = np.vstack((test_features, np.asarray(test_activations(i)))) np.save(logdir+'train_features', train_features) np.save(logdir+'valid_features', valid_features) np.save(logdir+'test_features', test_features) tmp_start4=time.clock() if epoch % generatition_frequency == 0: tail='-'+str(epoch)+'.png' random_z = np.random.standard_normal((n_batch, n_hidden[-1])).astype(np.float32) _x_mean, _x = random_generation(random_z) #print _x.shape #print _x_mean.shape image = paramgraphics.mat_to_img(_x.T, dim_input, colorImg=colorImg) image.save(logdir+'samples'+tail, 'PNG') image = paramgraphics.mat_to_img(_x_mean.T, dim_input, colorImg=colorImg) image.save(logdir+'mean_samples'+tail, 'PNG') #print 'generation_time', time.clock() - tmp_start4 end_time = time.clock() print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if NaN_count > 0: print '---------------NaN_count:', NaN_count with open(logdir+'hook.txt', 'a') as f: print >>f, '---------------NaN_count:', NaN_count
def c_6layer_mnist_imputation(seed=0, ctype='cva', pertub_type=3, pertub_prob=6, pertub_prob1=14, visualization_times=20, denoise_times=200, predir=None, n_batch=144, dataset='mnist.pkl.gz', batch_size=500): """ Missing data imputation """ #cp->cd->cpd->cd->c nkerns = [32, 32, 64, 64, 64] drops = [0, 0, 0, 0, 0, 1] #skerns=[5, 3, 3, 3, 3] #pools=[2, 1, 1, 2, 1] #modes=['same']*5 n_hidden = [500, 50] drop_inverses = [ 1, ] # 28->12->12->5->5/5*5*64->500->50->500->5*5*64/5->5->12->12->28 if dataset == 'mnist.pkl.gz': dim_input = (28, 28) colorImg = False logdir = 'results/imputation/' + ctype + '/mnist/' + ctype + '_6layer_mnist_' + str( pertub_type) + '_' + str(pertub_prob) + '_' + str( pertub_prob1) + '_' + str(denoise_times) + '_' logdir += str(int(time.time())) + '/' if not os.path.exists(logdir): os.makedirs(logdir) print predir with open(logdir + 'hook.txt', 'a') as f: print >> f, predir train_set_x, test_set_x, test_set_x_pertub, pertub_label, pertub_number = datapy.load_pertub_data( dirs='data_imputation/', pertub_type=pertub_type, pertub_prob=pertub_prob, pertub_prob1=pertub_prob1) datasets = datapy.load_data_gpu(dataset, have_matrix=True) _, _, _ = datasets[0] valid_set_x, _, _ = datasets[1] _, _, _ = datasets[2] # compute number of minibatches for training, validation and testing n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') x_pertub = T.matrix( 'x_pertub') # the data is presented as rasterized images p_label = T.matrix('p_label') random_z = T.matrix('random_z') drop = T.iscalar('drop') drop_inverse = T.iscalar('drop_inverse') activation = nonlinearity.relu rng = np.random.RandomState(seed) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) input_x = x_pertub.reshape((batch_size, 1, 28, 28)) recg_layer = [] cnn_output = [] #1 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), border_mode='valid', activation=activation)) if drops[0] == 1: cnn_output.append(recg_layer[-1].drop_output(input=input_x, drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(input=input_x)) #2 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[1] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #3 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[1], 12, 12), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), border_mode='valid', activation=activation)) if drops[2] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #4 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[2], 5, 5), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[3] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #5 recg_layer.append( ConvMaxPool.ConvMaxPool(rng, image_shape=(batch_size, nkerns[3], 5, 5), filter_shape=(nkerns[4], nkerns[3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[4] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) mlp_input_x = cnn_output[-1].flatten(2) activations = [] #1 recg_layer.append( FullyConnected.FullyConnected(rng=rng, n_in=5 * 5 * nkerns[-1], n_out=n_hidden[0], activation=activation)) if drops[-1] == 1: activations.append(recg_layer[-1].drop_output(input=mlp_input_x, drop=drop, rng=rng_share)) else: activations.append(recg_layer[-1].output(input=mlp_input_x)) #stochastic layer recg_layer.append( GaussianHidden.GaussianHidden(rng=rng, input=activations[-1], n_in=n_hidden[0], n_out=n_hidden[1], activation=None)) z = recg_layer[-1].sample_z(rng_share) gene_layer = [] z_output = [] random_z_output = [] #1 gene_layer.append( FullyConnected.FullyConnected(rng=rng, n_in=n_hidden[1], n_out=n_hidden[0], activation=activation)) z_output.append(gene_layer[-1].output(input=z)) random_z_output.append(gene_layer[-1].output(input=random_z)) #2 gene_layer.append( FullyConnected.FullyConnected(rng=rng, n_in=n_hidden[0], n_out=5 * 5 * nkerns[-1], activation=activation)) if drop_inverses[0] == 1: z_output.append(gene_layer[-1].drop_output(input=z_output[-1], drop=drop_inverse, rng=rng_share)) random_z_output.append(gene_layer[-1].drop_output( input=random_z_output[-1], drop=drop_inverse, rng=rng_share)) else: z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append( gene_layer[-1].output(input=random_z_output[-1])) input_z = z_output[-1].reshape((batch_size, nkerns[-1], 5, 5)) input_random_z = random_z_output[-1].reshape((n_batch, nkerns[-1], 5, 5)) #1 gene_layer.append( UnpoolConvNon.UnpoolConvNon(rng, image_shape=(batch_size, nkerns[-1], 5, 5), filter_shape=(nkerns[-2], nkerns[-1], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) z_output.append(gene_layer[-1].output(input=input_z)) random_z_output.append(gene_layer[-1].output_random_generation( input=input_random_z, n_batch=n_batch)) #2 gene_layer.append( UnpoolConvNon.UnpoolConvNon(rng, image_shape=(batch_size, nkerns[-2], 5, 5), filter_shape=(nkerns[-3], nkerns[-2], 3, 3), poolsize=(2, 2), border_mode='full', activation=activation)) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation( input=random_z_output[-1], n_batch=n_batch)) #3 gene_layer.append( UnpoolConvNon.UnpoolConvNon(rng, image_shape=(batch_size, nkerns[-3], 12, 12), filter_shape=(nkerns[-4], nkerns[-3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation( input=random_z_output[-1], n_batch=n_batch)) #4 gene_layer.append( UnpoolConvNon.UnpoolConvNon(rng, image_shape=(batch_size, nkerns[-4], 12, 12), filter_shape=(nkerns[-5], nkerns[-4], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation( input=random_z_output[-1], n_batch=n_batch)) #5 stochastic layer # for the last layer, the nonliearity should be sigmoid to achieve mean of Bernoulli gene_layer.append( UnpoolConvNon.UnpoolConvNon(rng, image_shape=(batch_size, nkerns[-5], 12, 12), filter_shape=(1, nkerns[-5], 5, 5), poolsize=(2, 2), border_mode='full', activation=nonlinearity.sigmoid)) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation( input=random_z_output[-1], n_batch=n_batch)) gene_layer.append( NoParamsBernoulliVisiable.NoParamsBernoulliVisiable( #rng=rng, #mean=z_output[-1], #data=input_x, )) logpx = gene_layer[-1].logpx(mean=z_output[-1], data=input_x) # 4-D tensor of random generation random_x_mean = random_z_output[-1] random_x = gene_layer[-1].sample_x(rng_share, random_x_mean) x_denoised = z_output[-1].flatten(2) x_denoised = p_label * x + (1 - p_label) * x_denoised mse = ((x - x_denoised)**2).sum() / pertub_number params = [] for g in gene_layer: params += g.params for r in recg_layer: params += r.params train_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x_pertub: train_set_x[index * batch_size:(index + 1) * batch_size], drop: np.cast['int32'](0) }) valid_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x_pertub: valid_set_x[index * batch_size:(index + 1) * batch_size], drop: np.cast['int32'](0) }) test_activations = theano.function(inputs=[x_pertub], outputs=T.concatenate(activations, axis=1), givens={drop: np.cast['int32'](0)}) imputation_model = theano.function( inputs=[index, x_pertub], outputs=[x_denoised, mse], givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], p_label: pertub_label[index * batch_size:(index + 1) * batch_size], drop: np.cast['int32'](0), drop_inverse: np.cast['int32'](0) }) ################## # Pretrain MODEL # ################## model_epoch = 600 if os.environ.has_key('model_epoch'): model_epoch = int(os.environ['model_epoch']) if predir is not None: color.printBlue('... setting parameters') color.printBlue(predir) if model_epoch == -1: pre_train = np.load(predir + 'best-model.npz') else: pre_train = np.load(predir + 'model-' + str(model_epoch) + '.npz') pre_train = pre_train['model'] if ctype == 'cva': for (para, pre) in zip(params, pre_train): para.set_value(pre) elif ctype == 'cmmva': for (para, pre) in zip(params, pre_train[:-2]): para.set_value(pre) else: exit() else: exit() ############### # TRAIN MODEL # ############### print '... training' epoch = 0 n_visualization = 100 output = np.ones((n_visualization, visualization_times + 2, 784)) output[:, 0, :] = test_set_x.get_value()[:n_visualization, :] output[:, 1, :] = test_set_x_pertub.get_value()[:n_visualization, :] image = paramgraphics.mat_to_img(output[:, 0, :].T, dim_input, colorImg=colorImg) image.save(logdir + 'data.png', 'PNG') image = paramgraphics.mat_to_img(output[:, 1, :].T, dim_input, colorImg=colorImg) image.save(logdir + 'data_pertub.png', 'PNG') tmp = test_set_x_pertub.get_value() while epoch < denoise_times: epoch = epoch + 1 this_mse = 0 for i in xrange(n_test_batches): d, m = imputation_model(i, tmp[i * batch_size:(i + 1) * batch_size]) tmp[i * batch_size:(i + 1) * batch_size] = np.asarray(d) this_mse += m if epoch <= visualization_times: output[:, epoch + 1, :] = tmp[:n_visualization, :] print epoch, this_mse with open(logdir + 'hook.txt', 'a') as f: print >> f, epoch, this_mse image = paramgraphics.mat_to_img(tmp[:n_visualization, :].T, dim_input, colorImg=colorImg) image.save(logdir + 'procedure-' + str(epoch) + '.png', 'PNG') np.savez(logdir + 'procedure-' + str(epoch), tmp=tmp) image = paramgraphics.mat_to_img((output.reshape(-1, 784)).T, dim_input, colorImg=colorImg, tile_shape=(n_visualization, 22)) image.save(logdir + 'output.png', 'PNG') np.savez(logdir + 'output', output=output) # save original train features and denoise test features for i in xrange(n_train_batches): if i == 0: train_features = np.asarray(train_activations(i)) else: train_features = np.vstack( (train_features, np.asarray(train_activations(i)))) for i in xrange(n_valid_batches): if i == 0: valid_features = np.asarray(valid_activations(i)) else: valid_features = np.vstack( (valid_features, np.asarray(valid_activations(i)))) for i in xrange(n_test_batches): if i == 0: test_features = np.asarray( test_activations(tmp[i * batch_size:(i + 1) * batch_size])) else: test_features = np.vstack( (test_features, np.asarray( test_activations(tmp[i * batch_size:(i + 1) * batch_size])))) np.save(logdir + 'train_features', train_features) np.save(logdir + 'valid_features', valid_features) np.save(logdir + 'test_features', test_features)
def cmmd(dataset='mnist.pkl.gz', batch_size=100, layer_num=3, hidden_dim=5, seed=0, layer_size=[64, 256, 256, 512]): validation_frequency = 1 test_frequency = 1 pre_train = 1 dim_input = (28, 28) colorImg = False print "Loading data ......." #datasets = datapy.load_data_gpu_60000_with_noise(dataset, have_matrix = True) datasets = datapy.load_data_gpu_60000(dataset, have_matrix=True) train_set_x, train_set_y, train_y_matrix = datasets[0] valid_set_x, valid_set_y, valid_y_matrix = datasets[1] test_set_x, test_set_y, test_y_matrix = datasets[2] rng = np.random.RandomState(seed) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) n_train_batches = train_set_x.get_value().shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size aImage = paramgraphics.mat_to_img(train_set_x.get_value()[0:169].T, dim_input, colorImg=colorImg) aImage.save('mnist_sample', 'PNG') ################################ ## build model ## ################################ print "Building model ......." index = T.lscalar() x = T.matrix('x') ##### batch_size * 28^2 y = T.vector('y') y_matrix = T.matrix('y_matrix') random_z = T.matrix('random_z') ### batch_size * hidden_dim Inv_K_d = T.matrix('Inv_K_d') layers = [] layer_output = [] activation = nonlinearity.relu #activation = Tnn.sigmoid #### first layer layers.append( FullyConnected.FullyConnected( rng=rng, n_in=10 + hidden_dim, #n_in = 10, n_out=layer_size[0], activation=activation)) layer_output.append(layers[-1].output_mix(input=[y_matrix, random_z])) #layer_output.append(layers[-1].output_mix2(input=[y_matrix,random_z])) #layer_output.append(layers[-1].output(input=x)) #layer_output.append(layers[-1].output(input=random_z)) #### middle layer for i in range(layer_num): layers.append( FullyConnected.FullyConnected(rng=rng, n_in=layer_size[i], n_out=layer_size[i + 1], activation=activation)) layer_output.append(layers[-1].output(input=layer_output[-1])) #### last layer activation = Tnn.sigmoid #activation = nonlinearity.relu layers.append( FullyConnected.FullyConnected(rng=rng, n_in=layer_size[-1], n_out=28 * 28, activation=activation)) x_gen = layers[-1].output(input=layer_output[-1]) lambda1_ = 100 lambda_ = theano.shared(np.asarray(lambda1_, dtype=np.float32)) K_d = kernel_gram_for_y(y_matrix, y_matrix, batch_size, 10) K_s = K_d K_sd = K_d Invv_1 = T.sum(y_matrix, axis=0) / batch_size Invv = NL.alloc_diag(1 / Invv_1) Inv_K_d = Invv #Inv_K_d = NL.matrix_inverse(K_d +lambda_ * T.identity_like(K_d)) Inv_K_s = Inv_K_d L_d = kernel_gram_for_x(x, x, batch_size, 28 * 28) L_s = kernel_gram_for_x(x_gen, x_gen, batch_size, 28 * 28) L_ds = kernel_gram_for_x(x, x_gen, batch_size, 28 * 28) ''' cost = -(NL.trace(T.dot(T.dot(T.dot(K_d, Inv_K_d), L_d), Inv_K_d)) +\ NL.trace(T.dot(T.dot(T.dot(K_s, Inv_K_s), L_s),Inv_K_s))- \ 2 * NL.trace(T.dot(T.dot(T.dot(K_sd, Inv_K_d) ,L_ds ), Inv_K_s))) ''' ''' cost = -(NL.trace(T.dot(L_d, T.ones_like(L_d) )) +\ NL.trace(T.dot(L_s,T.ones_like(L_s)))- \ 2 * NL.trace(T.dot(L_ds,T.ones_like(L_ds) ))) cost2 = 2 * T.sum(L_ds) - T.sum(L_s) + NL.trace(T.dot(L_s, T.ones_like(L_s)))\ - 2 * NL.trace( T.dot(L_ds , T.ones_like(L_ds))) cost2 = T.dot(T.dot(Inv_K_d, K_d),Inv_K_d) ''' cost2 = K_d #cost2 = T.dot(T.dot(Inv_K_d,K_d),Inv_K_d) #cost = - T.sum(L_d) +2 * T.sum(L_ds) - T.sum(L_s) cost2 = K_d cost2 = T.dot(T.dot(T.dot(y_matrix, Inv_K_d), Inv_K_d), y_matrix.T) cost = -(NL.trace(T.dot(T.dot(T.dot(T.dot(L_d, y_matrix),Inv_K_d), Inv_K_d),y_matrix.T)) +\ NL.trace(T.dot(T.dot(T.dot(T.dot(L_s, y_matrix),Inv_K_s), Inv_K_s),y_matrix.T))- \ 2 * NL.trace(T.dot(T.dot(T.dot(T.dot(L_ds, y_matrix),Inv_K_d), Inv_K_s),y_matrix.T))) ''' cost = - T.sum(L_d) +2 * T.sum(L_ds) - T.sum(L_s) cost = - NL.trace(K_s * Inv_K_s * L_s * Inv_K_s)+ \ 2 * NL.trace(K_sd * Inv_K_d * L_ds * Inv_K_s) ''' ################################ ## updates ## ################################ params = [] for aLayer in layers: params += aLayer.params gparams = [T.grad(cost, param) for param in params] learning_rate = 3e-4 weight_decay = 1.0 / n_train_batches epsilon = 1e-8 l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32)) get_optimizer = optimizer.get_adam_optimizer_max(learning_rate=l_r, decay1=0.1, decay2=0.001, weight_decay=weight_decay, epsilon=epsilon) updates = get_optimizer(params, gparams) ################################ ## pretrain model ## ################################ parameters = theano.function( inputs=[], outputs=params, ) gen_fig = theano.function( inputs=[y_matrix, random_z], outputs=x_gen, on_unused_input='warn', ) if pre_train == 1: print "pre-training model....." pre_train = np.load('./result/MMD-100-5-64-256-256-512.npz')['model'] for (para, pre) in zip(params, pre_train): para.set_value(pre) s = 8 for jj in range(10): a = np.zeros((s, 10), dtype=np.float32) for ii in range(s): kk = random.randint(0, 9) a[ii, kk] = 1 x_gen = gen_fig(a, gen_random_z(s, hidden_dim)) ttt = train_set_x.get_value() for ll in range(s): minn = 1000000 ss = 0 for kk in range(ttt.shape[0]): tt = np.linalg.norm(x_gen[ll] - ttt[kk]) if tt < minn: minn = tt ss = kk #np.concatenate(x_gen,ttt[ss]) x_gen = np.vstack((x_gen, ttt[ss])) aImage = paramgraphics.mat_to_img(x_gen.T, dim_input, colorImg=colorImg) aImage.save('samples_' + str(jj) + '_similar', 'PNG') ################################ ## prepare data ## ################################ #### compute matrix inverse #print "Preparing data ...." #Invv = NL.matrix_inverse(K_d +lambda_ * T.identity_like(K_d)) ''' Invv_1 = T.sum(y_matrix,axis=0)/batch_size Invv = NL.alloc_diag(1/Invv_1) Inv_K_d = Invv prepare_data = theano.function( inputs = [index], outputs = [Invv,K_d], givens = { #x:train_set_x[index * batch_size:(index + 1) * batch_size], y_matrix:train_y_matrix[index * batch_size:(index + 1) * batch_size], } ) Inv_K_d_l, K_d_l = prepare_data(0) print Inv_K_d_l for minibatch_index in range(1, n_train_batches): if minibatch_index % 10 == 0: print 'minibatch_index:', minibatch_index Inv_pre_mini, K_d_pre_mini = prepare_data(minibatch_index) Inv_K_d_l = np.vstack((Inv_K_d_l,Inv_pre_mini)) K_d_l = np.vstack((K_d_l,K_d_pre_mini)) Inv_K_d_g = theano.shared(Inv_K_d_l,borrow=True) K_d_g = theano.shared(K_d_l, borrow=True) ''' ################################ ## train model ## ################################ train_model = theano.function( inputs=[index, random_z], outputs=[cost, x_gen, cost2], updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], y_matrix: train_y_matrix[index * batch_size:(index + 1) * batch_size], #K_d:K_d_g[index * batch_size:(index + 1) * batch_size], #Inv_K_d:Inv_K_d_g[index * batch_size:(index + 1) * batch_size], }, on_unused_input='warn') n_epochs = 500 cur_epoch = 0 print "Training model ......" while (cur_epoch < n_epochs): cur_epoch = cur_epoch + 1 cor = 0 for minibatch_index in xrange(n_train_batches): print minibatch_index, print " : ", cost, x_gen, cost2 = train_model( minibatch_index, gen_random_z(batch_size, hidden_dim)) print 'cost: ', cost print 'cost2: ', cost2 if minibatch_index % 30 == 0: aImage = paramgraphics.mat_to_img(x_gen[0:1].T, dim_input, colorImg=colorImg) aImage.save( 'samples_epoch_' + str(cur_epoch) + '_mini_' + str(minibatch_index), 'PNG') if cur_epoch % 1 == 0: model = parameters() for i in range(len(model)): model[i] = np.asarray(model[i]).astype(np.float32) np.savez('model-' + str(cur_epoch), model=model)
def c_6layer_mnist_imputation(seed=0, ctype='cva', pertub_type=3, pertub_prob=6, pertub_prob1=14, visualization_times=20, denoise_times=200, predir=None, n_batch=144, dataset='mnist.pkl.gz', batch_size=500): """ Missing data imputation """ #cp->cd->cpd->cd->c nkerns=[32, 32, 64, 64, 64] drops=[0, 0, 0, 0, 0, 1] #skerns=[5, 3, 3, 3, 3] #pools=[2, 1, 1, 2, 1] #modes=['same']*5 n_hidden=[500, 50] drop_inverses=[1,] # 28->12->12->5->5/5*5*64->500->50->500->5*5*64/5->5->12->12->28 if dataset=='mnist.pkl.gz': dim_input=(28, 28) colorImg=False logdir = 'results/imputation/'+ctype+'/mnist/'+ctype+'_6layer_mnist_'+str(pertub_type)+'_'+str(pertub_prob)+'_'+str(pertub_prob1)+'_'+str(denoise_times)+'_' logdir += str(int(time.time()))+'/' if not os.path.exists(logdir): os.makedirs(logdir) print predir with open(logdir+'hook.txt', 'a') as f: print >>f, predir train_set_x, test_set_x, test_set_x_pertub, pertub_label, pertub_number = datapy.load_pertub_data(dirs='data_imputation/', pertub_type=pertub_type, pertub_prob=pertub_prob,pertub_prob1=pertub_prob1) datasets = datapy.load_data_gpu(dataset, have_matrix=True) _, _, _ = datasets[0] valid_set_x, _, _ = datasets[1] _, _, _ = datasets[2] # compute number of minibatches for training, validation and testing n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') x_pertub = T.matrix('x_pertub') # the data is presented as rasterized images p_label = T.matrix('p_label') random_z = T.matrix('random_z') drop = T.iscalar('drop') drop_inverse = T.iscalar('drop_inverse') activation = nonlinearity.relu rng = np.random.RandomState(seed) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) input_x = x_pertub.reshape((batch_size, 1, 28, 28)) recg_layer = [] cnn_output = [] #1 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), border_mode='valid', activation=activation )) if drops[0]==1: cnn_output.append(recg_layer[-1].drop_output(input=input_x, drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(input=input_x)) #2 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[1]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #3 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[1], 12, 12), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), border_mode='valid', activation=activation )) if drops[2]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #4 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[2], 5, 5), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[3]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) #5 recg_layer.append(ConvMaxPool.ConvMaxPool( rng, image_shape=(batch_size, nkerns[3], 5, 5), filter_shape=(nkerns[4], nkerns[3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) if drops[4]==1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share)) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) mlp_input_x = cnn_output[-1].flatten(2) activations = [] #1 recg_layer.append(FullyConnected.FullyConnected( rng=rng, n_in= 5 * 5 * nkerns[-1], n_out=n_hidden[0], activation=activation )) if drops[-1]==1: activations.append(recg_layer[-1].drop_output(input=mlp_input_x, drop=drop, rng=rng_share)) else: activations.append(recg_layer[-1].output(input=mlp_input_x)) #stochastic layer recg_layer.append(GaussianHidden.GaussianHidden( rng=rng, input=activations[-1], n_in=n_hidden[0], n_out = n_hidden[1], activation=None )) z = recg_layer[-1].sample_z(rng_share) gene_layer = [] z_output = [] random_z_output = [] #1 gene_layer.append(FullyConnected.FullyConnected( rng=rng, n_in=n_hidden[1], n_out = n_hidden[0], activation=activation )) z_output.append(gene_layer[-1].output(input=z)) random_z_output.append(gene_layer[-1].output(input=random_z)) #2 gene_layer.append(FullyConnected.FullyConnected( rng=rng, n_in=n_hidden[0], n_out = 5*5*nkerns[-1], activation=activation )) if drop_inverses[0]==1: z_output.append(gene_layer[-1].drop_output(input=z_output[-1], drop=drop_inverse, rng=rng_share)) random_z_output.append(gene_layer[-1].drop_output(input=random_z_output[-1], drop=drop_inverse, rng=rng_share)) else: z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output(input=random_z_output[-1])) input_z = z_output[-1].reshape((batch_size, nkerns[-1], 5, 5)) input_random_z = random_z_output[-1].reshape((n_batch, nkerns[-1], 5, 5)) #1 gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-1], 5, 5), filter_shape=(nkerns[-2], nkerns[-1], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) z_output.append(gene_layer[-1].output(input=input_z)) random_z_output.append(gene_layer[-1].output_random_generation(input=input_random_z, n_batch=n_batch)) #2 gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-2], 5, 5), filter_shape=(nkerns[-3], nkerns[-2], 3, 3), poolsize=(2, 2), border_mode='full', activation=activation )) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) #3 gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-3], 12, 12), filter_shape=(nkerns[-4], nkerns[-3], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) #4 gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-4], 12, 12), filter_shape=(nkerns[-5], nkerns[-4], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation )) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) #5 stochastic layer # for the last layer, the nonliearity should be sigmoid to achieve mean of Bernoulli gene_layer.append(UnpoolConvNon.UnpoolConvNon( rng, image_shape=(batch_size, nkerns[-5], 12, 12), filter_shape=(1, nkerns[-5], 5, 5), poolsize=(2, 2), border_mode='full', activation=nonlinearity.sigmoid )) z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch)) gene_layer.append(NoParamsBernoulliVisiable.NoParamsBernoulliVisiable( #rng=rng, #mean=z_output[-1], #data=input_x, )) logpx = gene_layer[-1].logpx(mean=z_output[-1], data=input_x) # 4-D tensor of random generation random_x_mean = random_z_output[-1] random_x = gene_layer[-1].sample_x(rng_share, random_x_mean) x_denoised = z_output[-1].flatten(2) x_denoised = p_label*x+(1-p_label)*x_denoised mse = ((x - x_denoised)**2).sum() / pertub_number params=[] for g in gene_layer: params+=g.params for r in recg_layer: params+=r.params train_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x_pertub: train_set_x[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) valid_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x_pertub: valid_set_x[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) test_activations = theano.function( inputs=[x_pertub], outputs=T.concatenate(activations, axis=1), givens={ drop: np.cast['int32'](0) } ) imputation_model = theano.function( inputs=[index, x_pertub], outputs=[x_denoised, mse], givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], p_label:pertub_label[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0), drop_inverse: np.cast['int32'](0) } ) ################## # Pretrain MODEL # ################## model_epoch = 600 if os.environ.has_key('model_epoch'): model_epoch = int(os.environ['model_epoch']) if predir is not None: color.printBlue('... setting parameters') color.printBlue(predir) if model_epoch == -1: pre_train = np.load(predir+'best-model.npz') else: pre_train = np.load(predir+'model-'+str(model_epoch)+'.npz') pre_train = pre_train['model'] if ctype == 'cva': for (para, pre) in zip(params, pre_train): para.set_value(pre) elif ctype == 'cmmva': for (para, pre) in zip(params, pre_train[:-2]): para.set_value(pre) else: exit() else: exit() ############### # TRAIN MODEL # ############### print '... training' epoch = 0 n_visualization = 100 output = np.ones((n_visualization, visualization_times+2, 784)) output[:,0,:] = test_set_x.get_value()[:n_visualization,:] output[:,1,:] = test_set_x_pertub.get_value()[:n_visualization,:] image = paramgraphics.mat_to_img(output[:,0,:].T, dim_input, colorImg=colorImg) image.save(logdir+'data.png', 'PNG') image = paramgraphics.mat_to_img(output[:,1,:].T, dim_input, colorImg=colorImg) image.save(logdir+'data_pertub.png', 'PNG') tmp = test_set_x_pertub.get_value() while epoch < denoise_times: epoch = epoch + 1 this_mse=0 for i in xrange(n_test_batches): d, m = imputation_model(i, tmp[i * batch_size: (i + 1) * batch_size]) tmp[i * batch_size: (i + 1) * batch_size] = np.asarray(d) this_mse+=m if epoch<=visualization_times: output[:,epoch+1,:] = tmp[:n_visualization,:] print epoch, this_mse with open(logdir+'hook.txt', 'a') as f: print >>f, epoch, this_mse image = paramgraphics.mat_to_img(tmp[:n_visualization,:].T, dim_input, colorImg=colorImg) image.save(logdir+'procedure-'+str(epoch)+'.png', 'PNG') np.savez(logdir+'procedure-'+str(epoch), tmp=tmp) image = paramgraphics.mat_to_img((output.reshape(-1,784)).T, dim_input, colorImg=colorImg, tile_shape=(n_visualization,22)) image.save(logdir+'output.png', 'PNG') np.savez(logdir+'output', output=output) # save original train features and denoise test features for i in xrange(n_train_batches): if i == 0: train_features = np.asarray(train_activations(i)) else: train_features = np.vstack((train_features, np.asarray(train_activations(i)))) for i in xrange(n_valid_batches): if i == 0: valid_features = np.asarray(valid_activations(i)) else: valid_features = np.vstack((valid_features, np.asarray(valid_activations(i)))) for i in xrange(n_test_batches): if i == 0: test_features = np.asarray(test_activations(tmp[i * batch_size: (i + 1) * batch_size])) else: test_features = np.vstack((test_features, np.asarray(test_activations(tmp[i * batch_size: (i + 1) * batch_size])))) np.save(logdir+'train_features', train_features) np.save(logdir+'valid_features', valid_features) np.save(logdir+'test_features', test_features)
print pertub_label.shape pertub_label = (pertub_label.T * tmp_b).T data_perturbed = pertub_label * data + (1 - pertub_label) * data_perturbed if pertub_type == 4: sio.savemat( 'data_imputation/type_' + str(pertub_type) + '_params_' + str(int(pertub_prob * 100)) + '_noise_rawdata.mat', { 'z_train': x_train.T, 'z_test_original': data, 'z_test': data_perturbed, 'pertub_label': pertub_label }) #print data_perturbed[:,:25].shape image = paramgraphics.mat_to_img(data_perturbed[:, :25], (28, 28), colorImg=False, scale=True) image.save('data_imputation/test_noise_4_' + str(pertub_prob) + '.png', 'PNG') elif pertub_type == 3: sio.savemat( 'data_imputation/type_' + str(pertub_type) + '_params_' + str(pertub_prob) + '_noise_rawdata.mat', { 'z_train': x_train.T, 'z_test_original': data, 'z_test': data_perturbed, 'pertub_label': pertub_label }) #print data_perturbed[:,:25].shape image = paramgraphics.mat_to_img(data_perturbed[:, :25], (28, 28), colorImg=False,
def c_6layer_svhn_imputation(seed=0, ctype='cva', pertub_type=5, pertub_prob=0, pertub_prob1=16, visualization_times=20, denoise_times=200, predir=None, n_batch=900, batch_size=500): """ Missing data imputation """ ''' svhn ''' n_channels = 3 colorImg = True dim_w = 32 dim_h = 32 dim_input = (dim_h, dim_w) n_classes = 10 first_drop = 0.6 if os.environ.has_key('first_drop'): first_drop = float(os.environ['first_drop']) last_drop = 1 if os.environ.has_key('last_drop'): last_drop = float(os.environ['last_drop']) nkerns_1 = 96 if os.environ.has_key('nkerns_1'): nkerns_1 = int(os.environ['nkerns_1']) nkerns_2 = 96 if os.environ.has_key('nkerns_2'): nkerns_2 = int(os.environ['nkerns_2']) opt_med = 'mom' if os.environ.has_key('opt_med'): opt_med = os.environ['opt_med'] train_logvar = True if os.environ.has_key('train_logvar'): train_logvar = bool(int(os.environ['train_logvar'])) dataset = 'svhnlcn' if os.environ.has_key('dataset'): dataset = os.environ['dataset'] n_z = 256 if os.environ.has_key('n_z'): n_z = int(os.environ['n_z']) #cp->cd->cpd->cd->c nkerns = [nkerns_1, nkerns_1, nkerns_1, nkerns_2, nkerns_2] drops = [0, 1, 1, 1, 0, 1] drop_p = [1, first_drop, first_drop, first_drop, 1, last_drop] n_hidden = [n_z] logdir = 'results/imputation/' + ctype + '/svhn/' + ctype + '_6layer_' + dataset + '_' logdir += str(int(time.time())) + '/' if not os.path.exists(logdir): os.makedirs(logdir) print predir with open(logdir + 'hook.txt', 'a') as f: print >> f, predir color.printRed('dataset ' + dataset) test_set_x, test_set_x_pertub, pertub_label, pertub_number = datapy.load_pertub_data_svhn( dirs='data_imputation/', dataset=dataset, pertub_type=pertub_type, pertub_prob=pertub_prob, pertub_prob1=pertub_prob1) pixel_max, pixel_min = datapy.load_max_min(dirs='data_imputation/', dataset=dataset, pertub_prob=pertub_prob) # compute number of minibatches for training, validation and testing #n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels random_z = T.matrix('random_z') x_pertub = T.matrix( 'x_pertub') # the data is presented as rasterized images p_label = T.matrix('p_label') drop = T.iscalar('drop') activation = nonlinearity.relu rng = np.random.RandomState(seed) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) input_x = x_pertub.reshape((batch_size, n_channels, dim_h, dim_w)) recg_layer = [] cnn_output = [] l = [] d = [] #1 recg_layer.append( ConvMaxPool_GauInit_DNN.ConvMaxPool_GauInit_DNN( rng, image_shape=(batch_size, n_channels, dim_h, dim_w), filter_shape=(nkerns[0], n_channels, 5, 5), poolsize=(2, 2), border_mode='same', activation=activation)) if drops[0] == 1: cnn_output.append(recg_layer[-1].drop_output(input=input_x, drop=drop, rng=rng_share, p=drop_p[0])) else: cnn_output.append(recg_layer[-1].output(input=input_x)) l += [1, 2] d += [1, 1] #2 recg_layer.append( ConvMaxPool_GauInit_DNN.ConvMaxPool_GauInit_DNN( rng, image_shape=(batch_size, nkerns[0], 16, 16), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[1] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share, p=drop_p[1])) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) l += [1, 2] d += [1, 1] #3 recg_layer.append( ConvMaxPool_GauInit_DNN.ConvMaxPool_GauInit_DNN( rng, image_shape=(batch_size, nkerns[1], 16, 16), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), border_mode='same', activation=activation)) if drops[2] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share, p=drop_p[2])) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) l += [1, 2] d += [1, 1] #4 recg_layer.append( ConvMaxPool_GauInit_DNN.ConvMaxPool_GauInit_DNN( rng, image_shape=(batch_size, nkerns[2], 8, 8), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) if drops[3] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share, p=drop_p[3])) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) l += [1, 2] d += [1, 1] #5 ''' --------------------- (2,2) or (4,4) ''' recg_layer.append( ConvMaxPool_GauInit_DNN.ConvMaxPool_GauInit_DNN( rng, image_shape=(batch_size, nkerns[3], 8, 8), filter_shape=(nkerns[4], nkerns[3], 3, 3), poolsize=(2, 2), border_mode='same', activation=activation)) if drops[4] == 1: cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share, p=drop_p[4])) else: cnn_output.append(recg_layer[-1].output(cnn_output[-1])) l += [1, 2] d += [1, 1] mlp_input_x = cnn_output[-1].flatten(2) activations = [] activations.append(mlp_input_x) #1 ''' ---------------------No MLP ''' ''' recg_layer.append(FullyConnected.FullyConnected( rng=rng, n_in= 4 * 4 * nkerns[-1], n_out=n_hidden[0], activation=activation )) if drops[-1]==1: activations.append(recg_layer[-1].drop_output(input=mlp_input_x, drop=drop, rng=rng_share, p=drop_p[-1])) else: activations.append(recg_layer[-1].output(input=mlp_input_x)) ''' #stochastic layer recg_layer.append( GaussianHidden.GaussianHidden(rng=rng, input=activations[-1], n_in=4 * 4 * nkerns[-1], n_out=n_hidden[0], activation=None)) l += [1, 2] d += [1, 1] l += [1, 2] d += [1, 1] z = recg_layer[-1].sample_z(rng_share) gene_layer = [] z_output = [] random_z_output = [] #1 gene_layer.append( FullyConnected.FullyConnected(rng=rng, n_in=n_hidden[0], n_out=4 * 4 * nkerns[-1], activation=activation)) z_output.append(gene_layer[-1].output(input=z)) random_z_output.append(gene_layer[-1].output(input=random_z)) l += [1, 2] d += [1, 1] #2 ''' gene_layer.append(FullyConnected.FullyConnected( rng=rng, n_in=n_hidden[0], n_out = 4*4*nkerns[-1], activation=activation )) if drop_inverses[0]==1: z_output.append(gene_layer[-1].drop_output(input=z_output[-1], drop=drop_inverse, rng=rng_share)) random_z_output.append(gene_layer[-1].drop_output(input=random_z_output[-1], drop=drop_inverse, rng=rng_share)) else: z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output(input=random_z_output[-1])) ''' input_z = z_output[-1].reshape((batch_size, nkerns[-1], 4, 4)) input_random_z = random_z_output[-1].reshape((n_batch, nkerns[-1], 4, 4)) #1 gene_layer.append( UnpoolConvNon_GauInit_DNN.UnpoolConvNon_GauInit_DNN( rng, image_shape=(batch_size, nkerns[-1], 4, 4), filter_shape=(nkerns[-2], nkerns[-1], 3, 3), poolsize=(2, 2), border_mode='same', activation=activation)) l += [1, 2] d += [1, 1] z_output.append(gene_layer[-1].output(input=input_z)) random_z_output.append(gene_layer[-1].output_random_generation( input=input_random_z, n_batch=n_batch)) #2 gene_layer.append( UnpoolConvNon_GauInit_DNN.UnpoolConvNon_GauInit_DNN( rng, image_shape=(batch_size, nkerns[-2], 8, 8), filter_shape=(nkerns[-3], nkerns[-2], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) l += [1, 2] d += [1, 1] z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation( input=random_z_output[-1], n_batch=n_batch)) #3 gene_layer.append( UnpoolConvNon_GauInit_DNN.UnpoolConvNon_GauInit_DNN( rng, image_shape=(batch_size, nkerns[-3], 8, 8), filter_shape=(nkerns[-4], nkerns[-3], 3, 3), poolsize=(2, 2), border_mode='same', activation=activation)) l += [1, 2] d += [1, 1] z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation( input=random_z_output[-1], n_batch=n_batch)) #4 gene_layer.append( UnpoolConvNon_GauInit_DNN.UnpoolConvNon_GauInit_DNN( rng, image_shape=(batch_size, nkerns[-4], 16, 16), filter_shape=(nkerns[-5], nkerns[-4], 3, 3), poolsize=(1, 1), border_mode='same', activation=activation)) l += [1, 2] d += [1, 1] z_output.append(gene_layer[-1].output(input=z_output[-1])) random_z_output.append(gene_layer[-1].output_random_generation( input=random_z_output[-1], n_batch=n_batch)) #5-1 stochastic layer # for this layer, the activation is None to get a Guassian mean gene_layer.append( UnpoolConvNon_GauInit_DNN.UnpoolConvNon_GauInit_DNN( rng, image_shape=(batch_size, nkerns[-5], 16, 16), filter_shape=(n_channels, nkerns[-5], 5, 5), poolsize=(2, 2), border_mode='same', activation=None)) l += [1, 2] d += [1, 1] x_mean = gene_layer[-1].output(input=z_output[-1]) random_x_mean = gene_layer[-1].output_random_generation( input=random_z_output[-1], n_batch=n_batch) #5-2 stochastic layer # for this layer, the activation is None to get logvar if train_logvar: gene_layer.append( UnpoolConvNon_GauInit_DNN.UnpoolConvNon_GauInit_DNN( rng, image_shape=(batch_size, nkerns[-5], 16, 16), filter_shape=(n_channels, nkerns[-5], 5, 5), poolsize=(2, 2), border_mode='same', activation=None)) l += [1, 2] d += [1, 1] x_logvar = gene_layer[-1].output(input=z_output[-1]) random_x_logvar = gene_layer[-1].output_random_generation( input=random_z_output[-1], n_batch=n_batch) else: x_logvar = theano.shared( np.ones((batch_size, n_channels, dim_h, dim_w), dtype='float32')) random_x_logvar = theano.shared( np.ones((n_batch, n_channels, dim_h, dim_w), dtype='float32')) gene_layer.append( NoParamsGaussianVisiable.NoParamsGaussianVisiable( #rng=rng, #mean=z_output[-1], #data=input_x, )) logpx = gene_layer[-1].logpx(mean=x_mean, logvar=x_logvar, data=input_x) random_x = gene_layer[-1].sample_x(rng_share=rng_share, mean=random_x_mean, logvar=random_x_logvar) x_denoised = p_label * x + (1 - p_label) * x_mean.flatten(2) mse = ((x - x_denoised)**2).sum() / pertub_number params = [] for g in gene_layer: params += g.params for r in recg_layer: params += r.params ''' train_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x_pertub: train_set_x[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) ''' ''' valid_activations = theano.function( inputs=[index], outputs=T.concatenate(activations, axis=1), givens={ x_pertub: valid_set_x[index * batch_size: (index + 1) * batch_size], drop: np.cast['int32'](0) } ) ''' test_activations = theano.function(inputs=[x_pertub], outputs=T.concatenate(activations, axis=1), givens={drop: np.cast['int32'](0)}) imputation_model = theano.function( inputs=[index, x_pertub], outputs=[x_denoised, mse], givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], p_label: pertub_label[index * batch_size:(index + 1) * batch_size], drop: np.cast['int32'](0), #drop_inverse: np.cast['int32'](0) }) ################## # Pretrain MODEL # ################## model_epoch = 100 if os.environ.has_key('model_epoch'): model_epoch = int(os.environ['model_epoch']) if predir is not None: color.printBlue('... setting parameters') color.printBlue(predir) if model_epoch == -1: pre_train = np.load(predir + 'best-model.npz') else: pre_train = np.load(predir + 'model-' + str(model_epoch) + '.npz') pre_train = pre_train['model'] if ctype == 'cva': for (para, pre) in zip(params, pre_train): para.set_value(pre) elif ctype == 'cmmva': for (para, pre) in zip(params, pre_train[:-2]): para.set_value(pre) else: exit() else: exit() ############### # TRAIN MODEL # ############### print '... training' scale = False epoch = 0 n_visualization = 900 pixel_max = pixel_max[:n_visualization] pixel_min = pixel_min[:n_visualization] output = np.ones((n_visualization, visualization_times + 2, n_channels * dim_input[0] * dim_input[1])) output[:, 0, :] = test_set_x.get_value()[:n_visualization, :] output[:, 1, :] = test_set_x_pertub.get_value()[:n_visualization, :] image = paramgraphics.mat_to_img(paramgraphics.scale_max_min( output[:, 0, :].T, pixel_max, pixel_min), dim_input, colorImg=colorImg, scale=scale) image.save(logdir + 'data.png', 'PNG') image = paramgraphics.mat_to_img(paramgraphics.scale_max_min( output[:, 1, :].T, pixel_max, pixel_min), dim_input, colorImg=colorImg, scale=scale) image.save(logdir + 'data_pertub.png', 'PNG') tmp = test_set_x_pertub.get_value() while epoch < denoise_times: epoch = epoch + 1 for i in xrange(n_test_batches): d, m = imputation_model(i, tmp[i * batch_size:(i + 1) * batch_size]) tmp[i * batch_size:(i + 1) * batch_size] = np.asarray(d) if epoch <= visualization_times: output[:, epoch + 1, :] = tmp[:n_visualization, :] image = paramgraphics.mat_to_img(paramgraphics.scale_max_min( tmp[:n_visualization, :].T, pixel_max, pixel_min), dim_input, colorImg=colorImg, scale=scale) image.save(logdir + 'procedure-' + str(epoch) + '.png', 'PNG') np.savez(logdir + 'procedure-' + str(epoch), tmp=tmp) ''' image = paramgraphics.mat_to_img((output.reshape(-1,32*32*3)).T, dim_input, colorImg=colorImg, tile_shape=(n_visualization,22), scale=scale) image.save(logdir+'output.png', 'PNG') np.savez(logdir+'output', output=output) ''' '''
'_params_' + str(start) + '_' + str(end) + '_noise_rawdata.mat', { 'z_test_original': data, 'z_test': data_perturbed, 'pertub_label': pertub_label }) sio.savemat( 'data_imputation/' + dataset + '_params_' + str(int(pertub_prob)) + '_max_min_pixel.mat', { 'pixel_max': pixel_max, 'pixel_min': pixel_min }) print data_perturbed[:, :25].shape scale = False image = paramgraphics.mat_to_img(paramgraphics.scale_max_min( data_perturbed[:, :25], pixel_max, pixel_min), (32, 32), colorImg=True, scale=scale) image.save( 'data_imputation/' + dataset + '_' + 'test_noise_type_' + str(pertub_type) + '_params_' + str(pertub_prob) + '.png', 'PNG') print data[:, :25].shape image = paramgraphics.mat_to_img(paramgraphics.scale_max_min( data[:, :25], pixel_max, pixel_min), (32, 32), colorImg=True, scale=scale) image.save( 'data_imputation/' + dataset + '_' + 'test_original_type_' + str(pertub_type) + '_params_' + str(pertub_prob) + '.png', 'PNG')