def finetuning(self, train_xy, valid_xy): # get the training, validation and testing function for the model log("> ... getting the finetuning functions") train_shared_xy = shared_dataset(train_xy, borrow=True) valid_shared_xy = shared_dataset(valid_xy, borrow=True) train_fn, valid_fn = self.build_finetune_functions( train_shared_xy=train_shared_xy, valid_shared_xy=valid_shared_xy, batch_size=self.cfg.batch_size ) log("> ... finetuning the model") while self.cfg.lrate.get_rate() != 0: # one epoch of sgd training train_error = train_sgd_without_streaming(train_fn, train_xy[0].shape[0], self.cfg) log("> epoch %d, training error %f " % (self.cfg.lrate.epoch, 100 * numpy.mean(train_error)) + "(%)") # validation valid_error = validate_by_minibatch_without_streaming(valid_fn, valid_xy[0].shape[0], self.cfg) log( "> epoch %d, lrate %f, validation error %f " % (self.cfg.lrate.epoch, self.cfg.lrate.get_rate(), 100 * numpy.mean(valid_error)) + "(%)" ) self.cfg.lrate.get_next_rate(current_error=100 * numpy.mean(valid_error)) log("> ... finetuning finished") # output nnet parameters and lrate, for training resume # if self.cfg.lrate.epoch % self.cfg.model_save_step == 0: # _nnet2file(dnn.layers, filename=wdir + '/nnet.tmp') # _lrate2file(cfg.lrate, wdir + '/training_state.tmp') """
def pretraining(self, train_x = None): log('> ... getting the pre-training functions') if train_x is not None: batch_size = self.cfg.batch_size if train_x.shape[0] > self.cfg.batch_size else train_x.shape[0] pretraining_fns = self.pretraining_functions(train_set_x=shared_dataset_X(train_x), batch_size=batch_size) # resume training start_layer_index = 0; start_epoch_index = log('> ... pre-training the model') # layer by layer; for each layer, go through the epochs for i in range(start_layer_index, self.cfg.ptr_layer_number): for epoch in range(start_epoch_index, self.cfg.epochs): # go through the training set c = [] for batch_index in xrange(train_x.shape[0] / batch_size): # loop over mini-batches c.append(pretraining_fns[i](index=batch_index, corruption=self.cfg.corruption_levels[i], lr=self.cfg.learning_rates[i] , momentum=self.cfg.momentum )) log('> layer %i, epoch %d, reconstruction cost %f' % (i, epoch, numpy.mean(c)))
def pretraining(self, train_x = None, train_y = None): #print len(numpy.shape(train_y)) multi_classes = True if self.cfg.n_outs >= 3 else False train_y = train_y.astype(dtype = theano.config.floatX) train_y_T = train_y[numpy.newaxis].T if multi_classes == False: train_xy = numpy.hstack((train_x, train_y_T)) shared_train_xy = shared_dataset_X(train_xy) else: enc = OneHotEncoder(n_values = self.cfg.n_outs, dtype = theano.config.floatX, sparse=False) encode_train_y = enc.fit_transform(train_y_T) shared_train_xy = shared_dataset_X(numpy.hstack((train_x, encode_train_y))) log('> ... getting the pre-training functions') if train_x is None: # this means we are using the stream input from file pass else: # this means using numpy matrix as input start_layer_index = 0; start_epoch_index = 0 log('> ... pre-training the model') # layer by layer; for each layer, go through the epochs for i in range(start_layer_index, self.cfg.ptr_layer_number): pretraining_fn = self.pretraining_function(self.dA_layers[i], train_set_x = shared_train_xy, batch_size = self.cfg.batch_size) for epoch in range(start_epoch_index, self.cfg.epochs): # go through the training set c = [] # while (not self.cfg.train_sets.is_finish()): # self.cfg.train_sets.load_next_partition(self.cfg.train_xy) iteration_per_epoch = train_x.shape[0] / self.cfg.batch_size if train_x.shape[0] / self.cfg.batch_size else 1 for batch_index in xrange(iteration_per_epoch): # loop over mini-batches c.append(pretraining_fn(index=batch_index, corruption=self.cfg.corruption_levels[i], lr=self.cfg.learning_rates[i] , momentum=self.cfg.momentum )) # self.cfg.train_sets.initialize_read() log('> layer %i, epoch %d, reconstruction cost %f' % (i, epoch, numpy.mean(c))) hidden_values = self.dA_layers[i].transform(shared_train_xy.get_value()) if self.cfg.settings.has_key('firstlayer_xy') and self.cfg.settings['firstlayer_xy'] == 1: shared_train_xy = shared_dataset_X(hidden_values) else: # add y for every layer if multi_classes == False: train_xy = numpy.hstack((hidden_values, train_y_T)) shared_train_xy = shared_dataset_X(train_xy) else: shared_train_xy = shared_dataset_X(numpy.hstack((hidden_values, encode_train_y)))
cfg.parse_config_dnn(arguments, nnet_spec) cfg.init_data_reading(train_data_spec, valid_data_spec) # parse pre-training options # pre-training files and layer number (how many layers are set to the pre-training parameters) ptr_layer_number = 0; ptr_file = '' if arguments.has_key('ptr_file') and arguments.has_key('ptr_layer_number'): ptr_file = arguments['ptr_file'] ptr_layer_number = int(arguments['ptr_layer_number']) # check working dir to see whether it's resuming training resume_training = False if os.path.exists(wdir + '/nnet.tmp') and os.path.exists(wdir + '/training_state.tmp'): resume_training = True cfg.lrate = _file2lrate(wdir + '/training_state.tmp') log('> ... found nnet.tmp and training_state.tmp, now resume training from epoch ' + str(cfg.lrate.epoch)) numpy_rng = numpy.random.RandomState(89677) theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) log('> ... building the model') # setup model if cfg.do_dropout: dnn = DNN_Dropout(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg) else: dnn = DNN(numpy_rng=numpy_rng, theano_rng = theano_rng, cfg = cfg) # initialize model parameters # if not resuming training, initialized from the specified pre-training file # if resuming training, initialized from the tmp model file if (ptr_layer_number > 0) and (resume_training is False): _file2nnet(dnn.layers, set_layer_num = ptr_layer_number, filename = ptr_file)