def predict(test_set_x, params): Fx = T.matrix(name="Fx_input") # the data is presented as rasterized images Sx = T.matrix(name="Sx_input") # the data is presented as rasterized images Fx_inp = T.matrix(name="Fx_inp") # the data is presented as rasterized images Sx_inp = T.matrix(name="Sx_inp") # the data is presented as rasterized images rng = numpy.random.RandomState(23455) # dataset parameters im_type = params["im_type"] nc = params["nc"] # number of channcels size = params["size"] # size = [480,640] orijinal size,[height,width] # Conv an Pooling parameters nkerns = params["nkerns"] kern_mat = params["kern_mat"] pool_mat = params["pool_mat"] # learning parameters batch_size = params["batch_size"] print "... building the model" cnnr = CNNRNet(rng, input, batch_size, nc, size, nkerns, kern_mat[0], kern_mat[1], pool_mat[0], pool_mat[1], Fx, Sx) cnnr.load(params["model_name"]) # cnnr.set_params(model_saver.load_model(model_name)) print "Model parameters loaded" # create a function to compute the mistakes that are made by the model predict_model = theano.function( [Fx_inp, Sx_inp], cnnr.y_pred, givens={Fx: Fx_inp, Sx: Sx_inp}, allow_input_downcast=True ) n_test_batches = len(test_set_x) n_test_batches /= batch_size y_pred = [] print "Prediction on test images" for i in xrange(n_test_batches): Fx = test_set_x[i * batch_size : (i + 1) * batch_size] data_Fx = dataset_loader.load_batch_images(size, nc, "F", Fx, im_type) data_Sx = dataset_loader.load_batch_images(size, nc, "S", Fx, im_type) if len(y_pred) == 0: y_pred = predict_model(data_Fx, data_Sx) else: y_pred = numpy.concatenate((y_pred, predict_model(data_Fx, data_Sx))) return y_pred
def train_model(): dataset = "/home/coskun/PycharmProjects/data/rgbd_dataset_freiburg3_large_cabinet/" rng = numpy.random.RandomState(23455) # size = [480,640] orijinal size rn_id = 1 size = [120, 160] nc = 1 # number of channcels nkerns = [20, 50] nkern1_size = [5, 5] nkern2_size = [5, 5] npool1_size = [2, 2] npool2_size = [2, 2] batch_size = 30 multi = 10 learning_rate = 0.0005 n_epochs = 400 initial_learning_rate = 0.0005 learning_rate_decay = 0.998 squared_filter_length_limit = 15.0 n_epochs = 3000 learning_rate = theano.shared(numpy.asarray(initial_learning_rate, dtype=theano.config.floatX)) #### the params for momentum mom_start = 0.5 mom_end = 0.99 # for epoch in [0, mom_epoch_interval], the momentum increases linearly # from mom_start to mom_end. After mom_epoch_interval, it stay at mom_end mom_epoch_interval = 500 mom_params = {"start": mom_start, "end": mom_end, "interval": mom_epoch_interval} lambda_1 = 0.01 # regulizer param lambda_2 = 0.01 datasets = dataset_loader.load_tum_dataV2(dataset, rn_id, multi) X_train, y_train = datasets[0] X_val, y_val = datasets[1] X_test, y_test = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = len(X_train) n_valid_batches = len(X_val) n_test_batches = len(X_test) n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size epoch = T.scalar() Fx = T.matrix(name="Fx_input") # the data is presented as rasterized images Sx = T.matrix(name="Sx_input") # the data is presented as rasterized images y = T.matrix("y") # the output are presented as matrix 1*3. Fx_inp = T.matrix(name="Fx_inp") # the data is presented as rasterized images Sx_inp = T.matrix(name="Sx_inp") # the data is presented as rasterized images y_inp = T.matrix("y_inp") print "... building the model" cnnr = CNNRNet(rng, input, batch_size, nc, size, nkerns, nkern1_size, nkern2_size, npool1_size, npool2_size, Fx, Sx) # create a function to compute the mistakes that are made by the model test_model = theano.function([Fx_inp, Sx_inp, y_inp], cnnr.errors(y), givens={Fx: Fx_inp, Sx: Sx_inp, y: y_inp}) validate_model = theano.function([Fx_inp, Sx_inp, y_inp], cnnr.errors(y), givens={Fx: Fx_inp, Sx: Sx_inp, y: y_inp}) cost = cnnr.mse(y) + lambda_1 * cnnr.L1 + lambda_2 * cnnr.L2_sqr # Compute gradients of the model wrt parameters gparams = [] for param in cnnr.params: # Use the right cost function here to train with or without dropout. gparam = T.grad(cost, param) gparams.append(gparam) # ... and allocate mmeory for momentum'd versions of the gradient gparams_mom = [] for param in cnnr.params: gparam_mom = theano.shared(numpy.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX)) gparams_mom.append(gparam_mom) # Compute momentum for the current epoch mom = ( mom_start * (1.0 - epoch / mom_epoch_interval) + mom_end * (epoch / mom_epoch_interval) if T.lt(epoch, mom_epoch_interval) else mom_end ) # Update the step direction using momentum updates = OrderedDict() for gparam_mom, gparam in zip(gparams_mom, gparams): # Misha Denil's original version # updates[gparam_mom] = mom * gparam_mom + (1. - mom) * gparam # change the update rule to match Hinton's dropout paper updates[gparam_mom] = mom * gparam_mom - (1.0 - mom) * learning_rate * gparam # ... and take a step along that direction for param, gparam_mom in zip(cnnr.params, gparams_mom): # Misha Denil's original version # stepped_param = param - learning_rate * updates[gparam_mom] # since we have included learning_rate in gparam_mom, we don't need it # here stepped_param = param + updates[gparam_mom] # This is a silly hack to constrain the norms of the rows of the weight # matrices. This just checks if there are two dimensions to the # parameter and constrains it if so... maybe this is a bit silly but it # should work for now. if param.get_value(borrow=True).ndim == 2: # squared_norms = T.sum(stepped_param**2, axis=1).reshape((stepped_param.shape[0],1)) # scale = T.clip(T.sqrt(squared_filter_length_limit / squared_norms), 0., 1.) # updates[param] = stepped_param * scale # constrain the norms of the COLUMNs of the weight, according to # https://github.com/BVLC/caffe/issues/109 col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0)) desired_norms = T.clip(col_norms, 0, T.sqrt(squared_filter_length_limit)) scale = desired_norms / (1e-7 + col_norms) updates[param] = stepped_param * scale else: updates[param] = stepped_param # Compile theano function for training. This returns the training cost and # updates the model parameters. output = cost train_model = theano.function( [Fx_inp, Sx_inp, y_inp, epoch], outputs=output, updates=updates, givens={Fx: Fx_inp, Sx: Sx_inp, y: y_inp} ) # create a function to compute the mistakes that are made by the model predict_model = theano.function([Fx_inp, Sx_inp], cnnr.y_pred, givens={Fx: Fx_inp, Sx: Sx_inp}) decay_learning_rate = theano.function( inputs=[], outputs=learning_rate, updates={learning_rate: learning_rate * learning_rate_decay} ) # end-snippet-1 ############### # TRAIN MODEL # ############### print "... training" # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0.0 start_time = timeit.default_timer() epoch = 0 done_looping = False epoch_counter = 0 while (epoch_counter < n_epochs) and (not done_looping): epoch_counter = epoch_counter + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch_counter - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print "training @ iter = ", iter Fx = X_train[minibatch_index * batch_size : (minibatch_index + 1) * batch_size] data_Fx = dataset_loader.load_batch_images(size, nc, "F", Fx) data_Sx = dataset_loader.load_batch_images(size, nc, "S", Fx) data_y = y_train[minibatch_index * batch_size : (minibatch_index + 1) * batch_size] cost_ij = train_model(data_Fx, data_Sx, data_y, epoch) # model_saver.save_model(epoch % 3, params) print ( "epoch %i, minibatch %i/%i, training cost %f " % (epoch_counter, minibatch_index + 1, n_train_batches, cost_ij) ) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = 0 for i in xrange(n_valid_batches): Fx = X_val[i * batch_size : (i + 1) * batch_size] data_Fx = dataset_loader.load_batch_images(size, nc, "F", Fx) data_Sx = dataset_loader.load_batch_images(size, nc, "S", Fx) data_y = y_val[i * batch_size : (i + 1) * batch_size] validation_losses = validation_losses + validate_model(data_Fx, data_Sx, data_y) this_validation_loss = validation_losses / n_valid_batches new_learning_rate = decay_learning_rate() print ( "epoch %i, minibatch %i/%i, learning_rate %f validation error %f %%" % ( epoch_counter, minibatch_index + 1, n_train_batches, learning_rate.get_value(borrow=True), this_validation_loss * 100.0, ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter test_losses = 0 for i in xrange(n_test_batches): Fx = X_test[i * batch_size : (i + 1) * batch_size] data_Fx = dataset_loader.load_batch_images(size, nc, "F", Fx) data_Sx = dataset_loader.load_batch_images(size, nc, "S", Fx) data_y = y_test[i * batch_size : (i + 1) * batch_size] err = test_model(data_Fx, data_Sx, data_y) test_losses = test_losses + err if i % 10 == 0: store = [] ypred = predict_model(data_Fx, data_Sx) store.append(Fx) store.append(ypred) store.append(data_y) model_saver.save_garb(store) print ("Iteration saved %i, err %f" % (i, err)) test_score = test_losses / n_test_batches ext = str(rn_id) + "_" + str(epoch_counter % 3) model_saver.save_model(ext, cnnr.params) print ( (" epoch %i, minibatch %i/%i, test error of " "best model %f %%") % (epoch_counter, minibatch_index + 1, n_train_batches, test_score * 100.0) ) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print ("Optimization complete.") print ( "Best validation score of %f %% obtained at iteration %i, " "with test performance %f %%" % (best_validation_loss * 100.0, best_iter + 1, test_score * 100.0) ) print >> sys.stderr, ( "The code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((end_time - start_time) / 60.0) )
def train_model(params): rn_id=params["rn_id"] im_type=params["im_type"] nc =params["nc"] # number of channcels size =params["size"] # size = [480,640] orijinal size,[height,width] # Conv an Pooling parameters nkerns =params["nkerns"] kern_mat =params["kern_mat"] pool_mat =params["pool_mat"] # learning parameters batch_size =params["batch_size"] n_epochs =params["n_epochs"] initial_learning_rate =params["initial_learning_rate"] learning_rate_decay =params["learning_rate_decay"] squared_filter_length_limit =params["squared_filter_length_limit"] learning_rate = theano.shared(numpy.asarray(initial_learning_rate, dtype=theano.config.floatX)) lambda_1 = params["lambda_1"] # regulizer param lambda_2 = params["lambda_2"] #### the params for momentum mom_start =params["mom_start"] mom_end = params["mom_end"] # for epoch in [0, mom_epoch_interval], the momentum increases linearly # from mom_start to mom_end. After mom_epoch_interval, it stay at mom_end mom_epoch_interval =params["mom_epoch_interval"] # early-stopping parameters patience = params["patience"] # look as this many examples regardless patience_increase = params["patience_increase"] # wait this much longer when a new best is # found improvement_threshold = params["improvement_threshold"] # a relative improvement of this much is #Loading dataset datasets = dataset_loader.load_tum_dataV2(params) X_train, y_train,overlaps_train = datasets[0] X_val, y_val,overlaps_val = datasets[1] X_test, y_test,overlaps_test = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = len(X_train) n_valid_batches = len(X_val) n_test_batches = len(X_test) n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size #Parameters to be passed net epoch = T.scalar() Fx = T.matrix(name='Fx_input') # the data is presented as rasterized images Sx = T.matrix(name='Sx_input') # the data is presented as rasterized images y = T.matrix('y') # the output are presented as matrix 1*3. Fx_inp = T.matrix(name='Fx_inp') # the data is presented as rasterized images Sx_inp = T.matrix(name='Sx_inp') # the data is presented as rasterized images y_inp = T.matrix('y_inp') print '... building the model' rng = numpy.random.RandomState(23455) cnnr = CNNRNet(rng, input, batch_size, nc, size, nkerns, kern_mat[0], kern_mat[1], pool_mat[0],pool_mat[1], Fx, Sx) # create a function to compute the mistakes that are made by the model test_model = theano.function( [Fx_inp, Sx_inp, y_inp], cnnr.errors(y), givens={ Fx: Fx_inp, Sx: Sx_inp, y: y_inp, },allow_input_downcast=True ) validate_model = theano.function( [Fx_inp, Sx_inp, y_inp], cnnr.errors(y), givens={ Fx: Fx_inp, Sx: Sx_inp, y: y_inp, },allow_input_downcast=True ) cost = cnnr.mse(y) + lambda_1 * cnnr.L1 + lambda_2 * cnnr.L2_sqr # Compute gradients of the model wrt parameters gparams = [] for param in cnnr.params: # Use the right cost function here to train with or without dropout. gparam = T.grad(cost, param) gparams.append(gparam) # ... and allocate mmeory for momentum'd versions of the gradient gparams_mom = [] for param in cnnr.params: gparam_mom = theano.shared(numpy.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX)) gparams_mom.append(gparam_mom) # Compute momentum for the current epoch mom = mom_start * (1.0 - epoch / mom_epoch_interval) + mom_end * (epoch / mom_epoch_interval) if T.lt(epoch, mom_epoch_interval) else mom_end # Update the step direction using momentum updates = OrderedDict() for gparam_mom, gparam in zip(gparams_mom, gparams): # Misha Denil's original version # updates[gparam_mom] = mom * gparam_mom + (1. - mom) * gparam # change the update rule to match Hinton's dropout paper updates[gparam_mom] = mom * gparam_mom - (1. - mom) * learning_rate * gparam # ... and take a step along that direction for param, gparam_mom in zip(cnnr.params, gparams_mom): # Misha Denil's original version # stepped_param = param - learning_rate * updates[gparam_mom] # since we have included learning_rate in gparam_mom, we don't need it # here stepped_param = param + updates[gparam_mom] # This is a silly hack to constrain the norms of the rows of the weight # matrices. This just checks if there are two dimensions to the # parameter and constrains it if so... maybe this is a bit silly but it # should work for now. if param.get_value(borrow=True).ndim == 2: # squared_norms = T.sum(stepped_param**2, axis=1).reshape((stepped_param.shape[0],1)) # scale = T.clip(T.sqrt(squared_filter_length_limit / squared_norms), 0., 1.) # updates[param] = stepped_param * scale # constrain the norms of the COLUMNs of the weight, according to # https://github.com/BVLC/caffe/issues/109 col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0)) desired_norms = T.clip(col_norms, 0, T.sqrt(squared_filter_length_limit)) scale = desired_norms / (1e-7 + col_norms) updates[param] = stepped_param * scale else: updates[param] = stepped_param # Compile theano function for training. This returns the training cost and # updates the model parameters. output = cost train_model = theano.function( [Fx_inp, Sx_inp, y_inp, epoch], outputs=output, updates=updates, givens={ Fx: Fx_inp, Sx: Sx_inp, y: y_inp, },allow_input_downcast=True ) # create a function to compute the mistakes that are made by the model predict_model = theano.function( [Fx_inp, Sx_inp], cnnr.y_pred, givens={ Fx: Fx_inp, Sx: Sx_inp, },allow_input_downcast=True ) decay_learning_rate = theano.function(inputs=[], outputs=learning_rate, updates={learning_rate: learning_rate * learning_rate_decay}) # end-snippet-1 ############### # TRAIN MODEL # ############### print '... training' # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False epoch_counter = 0 while (epoch_counter < n_epochs) and (not done_looping): epoch_counter = epoch_counter + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch_counter - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter Fx = X_train[minibatch_index * batch_size: (minibatch_index + 1) * batch_size] data_Fx = dataset_loader.load_batch_images(size, nc, "F", Fx,im_type) data_Sx = dataset_loader.load_batch_images(size, nc, "S", Fx,im_type) data_y = y_train[minibatch_index * batch_size: (minibatch_index + 1) * batch_size] cost_ij = train_model(data_Fx, data_Sx, data_y, epoch) # model_saver.save_model(epoch % 3, params) print('epoch %i, minibatch %i/%i, training cost %f ' % (epoch_counter, minibatch_index + 1, n_train_batches, cost_ij)) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = 0 for i in xrange(n_valid_batches): Fx = X_val[i * batch_size: (i + 1) * batch_size] data_Fx = dataset_loader.load_batch_images(size, nc, "F", Fx,im_type) data_Sx = dataset_loader.load_batch_images(size, nc, "S", Fx,im_type) data_y = y_val[i * batch_size: (i + 1) * batch_size] validation_losses = validation_losses + validate_model(data_Fx, data_Sx, data_y) this_validation_loss = validation_losses / n_valid_batches new_learning_rate = decay_learning_rate() print('epoch %i, minibatch %i/%i, learning_rate %f validation error %f %%' % (epoch_counter, minibatch_index + 1, n_train_batches, learning_rate.get_value(borrow=True), this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter test_losses = 0 for i in xrange(n_test_batches): Fx = X_test[i * batch_size: (i + 1) * batch_size] data_Fx = dataset_loader.load_batch_images(size, nc, "F", Fx,im_type) data_Sx = dataset_loader.load_batch_images(size, nc, "S", Fx,im_type) data_y = y_test[i * batch_size: (i + 1) * batch_size] err= test_model(data_Fx, data_Sx, data_y) test_losses = test_losses + err if(i%100==-1): store=[] ypred= predict_model(data_Fx, data_Sx) print(ypred) print(Fx) store.append(Fx) store.append(ypred) store.append(data_y) model_saver.save_garb(store) print("Iteration saved %i, err %f"%(i,err)) test_score = test_losses / n_test_batches ext="models/"+str(rn_id)+"_"+str(epoch_counter % 3)+"_model_numpy" cnnr.save(ext) #model_saver.save_model(ext, cnnr.params) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch_counter, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test_dA(learning_rate=0.1, training_epochs=15,output_folder='dA_plots'): size = [160, 120] #[width,height] batch_size=20 dataset="/home/coskun/PycharmProjects/data/rgbd_dataset_freiburg3_large_cabinet/" X_Pairs=dataset_loader.load_pairs(dataset,step_size=[]) n_train_batches = len(X_Pairs) n_train_batches /= batch_size Fx = T.matrix(name='Fx_input') # the data is presented as rasterized images Sx = T.matrix(name='Sx_input') # the data is presented as rasterized images Fx_inp = T.matrix(name='Fx_inp') # the data is presented as rasterized images Sx_inp = T.matrix(name='Sx_inp') # the data is presented as rasterized images if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA( numpy_rng=rng, theano_rng=theano_rng, Fx=Fx, Sx=Sx, n_visible=size[0] * size[1], n_hidden=2000 ) cost, updates = da.get_cost_updates( learning_rate=learning_rate ) train_da = theano.function( [Fx_inp, Sx_inp], cost, updates=updates, givens={ Fx: Fx_inp, Sx: Sx_inp } ,mode="DebugMode" ) start_time = timeit.default_timer() ############ # TRAINING # ############ print "Training Started" # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for i in xrange(n_train_batches): Fx = X_Pairs[i * batch_size: (i + 1) * batch_size] data_Fx = dataset_loader.load_batch_images(size,1, "F", Fx) data_Sx = dataset_loader.load_batch_images(size,1, "S", Fx) print("Trainin on images") cst=train_da(data_Fx,data_Sx) print("Cost:") print(str(cst)) c.append(cst) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = timeit.default_timer() training_time = (end_time - start_time) print >> sys.stderr, ('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.)) image = Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(size[0], size[1]), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_0.png') os.chdir('../')