def build_finetune_functions(self, train_shared_xy, valid_shared_xy): """ This function is to build finetune functions and to update gradients :param train_shared_xy: theano shared variable for input and output training data :type train_shared_xy: tuple of shared variable :param valid_shared_xy: theano shared variable for input and output development data :type valid_shared_xy: tuple of shared variable :returns: finetune functions for training and development """ logger = logging.getLogger("DNN initialization") (train_set_x, train_set_y) = train_shared_xy (valid_set_x, valid_set_y) = valid_shared_xy lr = T.scalar('lr', dtype=theano.config.floatX) mom = T.scalar('mom', dtype=theano.config.floatX) # momentum cost = self.finetune_cost #+ self.L2_reg * self.L2_sqr gparams = T.grad(cost, self.params) # use optimizer if self.optimizer == 'sgd': # zip just concatenate two lists updates = OrderedDict() for param, gparam in zip(self.params, gparams): weight_update = self.updates[param] upd = mom * weight_update - lr * gparam updates[weight_update] = upd updates[param] = param + upd elif self.optimizer == 'adam': updates = compile_ADAM_train_function(self, gparams, learning_rate=lr) elif self.optimizer == 'rprop': updates = compile_RPROP_train_function(self, gparams) else: logger.critical( "This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n" % (self.optimizer)) sys.exit(1) train_model = theano.function( inputs=[lr, mom], #index, batch_size outputs=self.errors, updates=updates, givens={ self.x: train_set_x, #[index*batch_size:(index + 1)*batch_size] self.y: train_set_y, self.is_train: np.cast['int32'](1) }, on_unused_input='ignore') valid_model = theano.function(inputs=[], outputs=self.errors, givens={ self.x: valid_set_x, self.y: valid_set_y, self.is_train: np.cast['int32'](0) }, on_unused_input='ignore') return train_model, valid_model
def build_finetune_functions(self, train_shared_xy, valid_shared_xy, use_lhuc=False, layer_index=0): """ This function is to build finetune functions and to update gradients :param train_shared_xy: theano shared variable for input and output training data :type train_shared_xy: tuple of shared variable :param valid_shared_xy: theano shared variable for input and output development data :type valid_shared_xy: tuple of shared variable :returns: finetune functions for training and development """ logger = logging.getLogger("DNN initialization") (train_set_x, train_set_y) = train_shared_xy (valid_set_x, valid_set_y) = valid_shared_xy lr = T.scalar('lr', dtype=theano.config.floatX) mom = T.scalar('mom', dtype=theano.config.floatX) # momentum cost = self.finetune_cost # + self.L2_reg * self.L2_sqr ## added for LHUC if use_lhuc: # In lhuc the parameters are only scaling parameters which have the name 'c' self.lhuc_params = [] for p in self.params: if p.name == 'c': self.lhuc_params.append(p) params = self.lhuc_params gparams = T.grad(cost, params) else: params = self.params gparams = T.grad(cost, params) freeze_params = 0 for layer in range(layer_index): freeze_params += len(self.rnn_layers[layer].params) # use optimizer if self.optimizer == 'sgd': # zip just concatenate two lists updates = OrderedDict() for i, (param, gparam) in enumerate(zip(params, gparams)): weight_update = self.updates[param] upd = mom * weight_update - lr * gparam updates[weight_update] = upd # freeze layers and update weights if i >= freeze_params: updates[param] = param + upd elif self.optimizer == 'adam': updates = compile_ADAM_train_function(self, gparams, learning_rate=lr) elif self.optimizer == 'rprop': updates = compile_RPROP_train_function(self, gparams) else: logger.critical( "This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n" % (self.optimizer)) sys.exit(1) train_model = theano.function( inputs=[lr, mom], # index, batch_size outputs=self.errors, updates=updates, givens={ self.x: train_set_x, # [index*batch_size:(index + 1)*batch_size] self.y: train_set_y, self.is_train: np.cast['int32'](1) }, on_unused_input='ignore') valid_model = theano.function(inputs=[], outputs=self.errors, givens={ self.x: valid_set_x, self.y: valid_set_y, self.is_train: np.cast['int32'](0) }, on_unused_input='ignore') return train_model, valid_model
def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size): (train_set_x, train_set_y) = train_shared_xy (valid_set_x, valid_set_y) = valid_shared_xy # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches /= batch_size index = T.lscalar('index') # index to a [mini]batch learning_rate = T.fscalar('learning_rate') momentum = T.fscalar('momentum') layer_size = len(self.params) lr_list = [] for i in range(layer_size): lr_list.append(learning_rate) ##top 2 layers use a smaller learning rate if layer_size > 4: for i in range(layer_size-4, layer_size): lr_list[i] = learning_rate * 0.5 # compute list of fine-tuning updates # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) if self.use_rprop == 0: updates = OrderedDict() layer_index = 0 for dparam, gparam in zip(self.delta_params, gparams): updates[dparam] = momentum * dparam - gparam * lr_list[layer_index] layer_index += 1 for dparam, param in zip(self.delta_params, self.params): updates[param] = param + updates[dparam] train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001), theano.Param(momentum, default = 0.5)], outputs=self.errors, updates=updates, on_unused_input='ignore', givens={self.x: train_set_x[index * batch_size: (index + 1) * batch_size], self.y: train_set_y[index * batch_size: (index + 1) * batch_size]}) elif self.use_rprop: updates = compile_RPROP_train_function(self, gparams) ## retain learning rate and momentum to make interface backwards compatible, ## but we won't use them, means we have to use on_unused_input='warn'. ## Otherwise same function for RPROP or otherwise -- can move this block outside if clause. train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001), theano.Param(momentum, default = 0.5)], outputs=self.errors, updates=updates, on_unused_input='warn', givens={self.x: train_set_x[index * batch_size: (index + 1) * batch_size], self.y: train_set_y[index * batch_size: (index + 1) * batch_size]}) valid_fn = theano.function([], outputs=self.errors, on_unused_input='ignore', givens={self.x: valid_set_x, self.y: valid_set_y}) valid_score_i = theano.function([index], outputs=self.errors, on_unused_input='ignore', givens={self.x: valid_set_x[index * batch_size: (index + 1) * batch_size], self.y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) # Create a function that scans the entire validation set def valid_score(): return [valid_score_i(i) for i in range(n_valid_batches)] return train_fn, valid_fn
def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size): (train_set_x, train_set_x_proj, train_set_y) = train_shared_xy (valid_set_x, valid_set_x_proj, valid_set_y) = valid_shared_xy # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches /= batch_size index = T.lscalar('index') # index to a [mini]batch learning_rate = T.fscalar('learning_rate') ## osw temp momentum = T.fscalar('momentum') ## osw temp ##proj_learning_rate = T.dscalar('proj_learning_rate') ## osw temp layer_size = len(self.params) lr_list = [] for i in xrange(layer_size): lr_list.append(learning_rate) ##top 2 layers use a smaller learning rate if layer_size > 4: for i in range(layer_size-4, layer_size): lr_list[i] = learning_rate * 0.5 # compute list of fine-tuning updates # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) def make_updates_plain(param_list, delta_param_list, gparam_list, lr_list, params_to_update): updates = OrderedDict() for dparam, gparam, lrate in zip(delta_param_list, gparam_list, lr_list): updates[dparam] = momentum * dparam - gparam * lrate for dparam, param in zip(delta_param_list, param_list): updates[param] = param + updates[dparam] return updates ## Define updates over various subsets of model parameters. These will be used ## in various compiled training/inference functions. ## As a guide to the structure of params, params for 2 hidden layers, projection, ## first split layer, will look like this: # i: 0 1 2 3 4 5 6 7 ## [W_proj; W_1a, W_1b, b_1; W_2 b_2; W_o, b_o] ''' updates -- all params subword_updates: exclude parameters at 0 and 2 -- proj. weights and proj. half of split layer word_updates: exclude all but the word half of the split layer, and bias of that layer, and projection projection_updates: exclude all but parameters at 0 -- projection layer ''' all_params = range(len(self.params)) subword_params = [i for i in all_params if i not in [0,2]] word_params = [0,2,3] projection_params = [0] if self.use_rprop: print '========USING RPROP =========' updates = compile_RPROP_train_function(self, gparams) subword_updates = compile_RPROP_train_function(self, gparams, params_to_update=subword_params) word_updates = compile_RPROP_train_function(self, gparams, params_to_update=word_params) projection_updates = compile_RPROP_train_function(self, gparams, params_to_update=projection_params) on_unused_input_value = 'warn' else: print '========NOT USING RPROP =========' updates = make_updates_plain(self.params, self.delta_params, gparams, lr_list, all_params) subword_updates = make_updates_plain(self.params, self.delta_params, gparams, lr_list, subword_params) word_updates = make_updates_plain(self.params, self.delta_params, gparams, lr_list, word_params) projection_updates = make_updates_plain(self.params, self.delta_params, gparams, lr_list, projection_params) on_unused_input_value = 'raise' ## Theano's default ##### OLDER VERSION:-- ''' ## All updates: updates = OrderedDict() layer_index = 0 for dparam, gparam in zip(self.delta_params, gparams): updates[dparam] = momentum * dparam - gparam * lr_list[layer_index] layer_index += 1 for dparam, param in zip(self.delta_params, self.params): updates[param] = param + updates[dparam] ## These updates exclude parameters at 0 and 2 -- proj. weights and proj. half of split layer subword_updates = OrderedDict() for (i, (dparam, gparam)) in enumerate(zip(self.delta_params, gparams)): if i not in [0,2]: ## proj weights and proj half of split layer subword_updates[dparam] = momentum * dparam - gparam * lr_list[i] for (i, (dparam, param)) in enumerate(zip(self.delta_params, self.params)): if i not in [0,2]: ## proj weights and proj half of split layer subword_updates[param] = param + subword_updates[dparam] ## These updates exclude parameters at 1 -- subword half of split layer ### NO!!! -- just the word half of the split layer, and bias of that layer word_updates = OrderedDict() for (i, (dparam, gparam)) in enumerate(zip(self.delta_params, gparams)): if i in [0,2,3]: word_updates[dparam] = momentum * dparam - gparam * lr_list[i] for (i, (dparam, param)) in enumerate(zip(self.delta_params, self.params)): if i in [0,2,3]: word_updates[param] = param + word_updates[dparam] ## These updates exclude all but parameters at 0 -- projection layer projection_updates = OrderedDict() for (i, (dparam, gparam)) in enumerate(zip(self.delta_params, gparams)): if i == 0: projection_updates[dparam] = momentum * dparam - gparam * lr_list[i] for (i, (dparam, param)) in enumerate(zip(self.delta_params, self.params)): if i == 0: projection_updates[param] = param + projection_updates[dparam] ''' ## Update all params -- maybe never used: print 'compile train_all_fn' train_all_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001), theano.Param(momentum, default = 0.5)], outputs=self.errors, updates=updates, on_unused_input=on_unused_input_value, givens={self.x: train_set_x[index * batch_size: (index + 1) * batch_size], self.x_proj: train_set_x_proj[index * batch_size: (index + 1) * batch_size], self.y: train_set_y[index * batch_size: (index + 1) * batch_size]}) ## Update all but word-projection part of split first hidden layer and projection weights print 'compile train_subword_fn' train_subword_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001), theano.Param(momentum, default = 0.5)], outputs=self.errors, updates=subword_updates, on_unused_input=on_unused_input_value, givens={self.x: train_set_x[index * batch_size: (index + 1) * batch_size], self.x_proj: train_set_x_proj[index * batch_size: (index + 1) * batch_size], self.y: train_set_y[index * batch_size: (index + 1) * batch_size]}) print 'compile train_word_fn' train_word_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001), theano.Param(momentum, default = 0.5)], outputs=self.errors, updates=word_updates, on_unused_input=on_unused_input_value, givens={self.x: train_set_x[index * batch_size: (index + 1) * batch_size], self.x_proj: train_set_x_proj[index * batch_size: (index + 1) * batch_size], self.y: train_set_y[index * batch_size: (index + 1) * batch_size]}) print 'compile infer_projections_fn -- NB: to operate by default on validation set' infer_projections_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001), theano.Param(momentum, default = 0.5)], outputs=self.errors, updates=projection_updates, on_unused_input=on_unused_input_value, givens={self.x: valid_set_x[index * batch_size: (index + 1) * batch_size], self.x_proj: valid_set_x_proj[index * batch_size: (index + 1) * batch_size], self.y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) valid_fn = theano.function([], outputs=self.errors, givens={self.x: valid_set_x, self.x_proj: valid_set_x_proj, self.y: valid_set_y}) valid_score_i = theano.function([index], outputs=self.errors, givens={self.x: valid_set_x[index * batch_size: (index + 1) * batch_size], self.x_proj: valid_set_x_proj[index * batch_size: (index + 1) * batch_size], self.y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) # Create a function that scans the entire validation set def valid_score(): return [valid_score_i(i) for i in xrange(n_valid_batches)] print 'finished Theano function compilation' return train_all_fn, train_subword_fn, train_word_fn, infer_projections_fn, valid_fn, valid_score_i
def build_finetune_functions_S2SPF(self, train_shared_xydf, valid_shared_xydf, layer_index=6): """ This function is to build finetune functions and to update gradients :param train_shared_xy: theano shared variable for input and output training data :type train_shared_xy: tuple of shared variable :param valid_shared_xy: theano shared variable for input and output development data :type valid_shared_xy: tuple of shared variable :returns: finetune functions for training and development """ (train_set_x, train_set_y, train_set_d, train_set_f) = train_shared_xydf (valid_set_x, valid_set_y, valid_set_d, valid_set_f) = valid_shared_xydf lr = T.scalar('lr', dtype=theano.config.floatX) mom = T.scalar('mom', dtype=theano.config.floatX) # momentum cost = self.finetune_cost # + self.L2_reg * self.L2_sqr params = self.params gparams = T.grad(cost, params) encoder_params = 0 for layer in range(layer_index): encoder_params += len(self.rnn_layers[layer].params) # use optimizer if self.optimizer == 'sgd': # zip just concatenate two lists updates = OrderedDict() for i, (param, gparam) in enumerate(zip(params, gparams)): weight_update = self.updates[param] if i >= encoder_params: upd = mom * weight_update - lr * gparam else: upd = mom * weight_update - (lr * 2) * gparam updates[weight_update] = upd updates[param] = param + upd elif self.optimizer == 'adam': updates = compile_ADAM_train_function(self, gparams, learning_rate=lr) elif self.optimizer == 'rprop': updates = compile_RPROP_train_function(self, gparams) else: logger.critical( "This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n" % (self.optimizer)) sys.exit(1) train_model = theano.function(inputs=[lr, mom], outputs=self.errors, updates=updates, givens={ self.x: train_set_x, self.y: train_set_y, self.d: train_set_d, self.f: train_set_f, self.is_train: np.cast['int32'](1) }, on_unused_input='ignore') valid_model = theano.function(inputs=[], outputs=self.errors, givens={ self.x: valid_set_x, self.y: valid_set_y, self.d: valid_set_d, self.f: valid_set_f, self.is_train: np.cast['int32'](0) }, on_unused_input='ignore') return train_model, valid_model
def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size): (train_set_x, train_set_y) = train_shared_xy (valid_set_x, valid_set_y) = valid_shared_xy # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches /= batch_size index = T.lscalar('index') # index to a [mini]batch learning_rate = T.fscalar('learning_rate') momentum = T.fscalar('momentum') layer_size = len(self.params) lr_list = [] for i in xrange(layer_size): lr_list.append(learning_rate) ##top 2 layers use a smaller learning rate if layer_size > 4: for i in range(layer_size-4, layer_size): lr_list[i] = learning_rate * 0.5 # compute list of fine-tuning updates # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) if self.use_rprop == 0: updates = OrderedDict() layer_index = 0 for dparam, gparam in zip(self.delta_params, gparams): updates[dparam] = momentum * dparam - gparam * lr_list[layer_index] layer_index += 1 for dparam, param in zip(self.delta_params, self.params): updates[param] = param + updates[dparam] train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001), theano.Param(momentum, default = 0.5)], outputs=self.errors, updates=updates, on_unused_input='ignore', givens={self.x: train_set_x[index * batch_size: (index + 1) * batch_size], self.y: train_set_y[index * batch_size: (index + 1) * batch_size]}) elif self.use_rprop: updates = compile_RPROP_train_function(self, gparams) ## retain learning rate and momentum to make interface backwards compatible, ## but we won't use them, means we have to use on_unused_input='warn'. ## Otherwise same function for RPROP or otherwise -- can move this block outside if clause. train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001), theano.Param(momentum, default = 0.5)], outputs=self.errors, updates=updates, on_unused_input='warn', givens={self.x: train_set_x[index * batch_size: (index + 1) * batch_size], self.y: train_set_y[index * batch_size: (index + 1) * batch_size]}) valid_fn = theano.function([], outputs=self.errors, on_unused_input='ignore', givens={self.x: valid_set_x, self.y: valid_set_y}) valid_score_i = theano.function([index], outputs=self.errors, on_unused_input='ignore', givens={self.x: valid_set_x[index * batch_size: (index + 1) * batch_size], self.y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) # Create a function that scans the entire validation set def valid_score(): return [valid_score_i(i) for i in xrange(n_valid_batches)] return train_fn, valid_fn
def build_finetune_functions_S2SPF(self, train_shared_xydf, valid_shared_xydf, layer_index=6): """ This function is to build finetune functions and to update gradients :param train_shared_xy: theano shared variable for input and output training data :type train_shared_xy: tuple of shared variable :param valid_shared_xy: theano shared variable for input and output development data :type valid_shared_xy: tuple of shared variable :returns: finetune functions for training and development """ (train_set_x, train_set_y, train_set_d, train_set_f) = train_shared_xydf (valid_set_x, valid_set_y, valid_set_d, valid_set_f) = valid_shared_xydf lr = T.scalar('lr', dtype = theano.config.floatX) mom = T.scalar('mom', dtype = theano.config.floatX) # momentum cost = self.finetune_cost #+ self.L2_reg * self.L2_sqr params = self.params gparams = T.grad(cost, params) encoder_params = 0 for layer in range(layer_index): encoder_params += len(self.rnn_layers[layer].params) # use optimizer if self.optimizer=='sgd': # zip just concatenate two lists updates = OrderedDict() for i, (param, gparam) in enumerate(zip(params, gparams)): weight_update = self.updates[param] if i >= encoder_params: upd = mom * weight_update - lr * gparam else: upd = mom * weight_update - (lr*2) * gparam updates[weight_update] = upd updates[param] = param + upd elif self.optimizer=='adam': updates = compile_ADAM_train_function(self, gparams, learning_rate=lr) elif self.optimizer=='rprop': updates = compile_RPROP_train_function(self, gparams) else: logger.critical("This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n" %(self.optimizer)) sys.exit(1) train_model = theano.function(inputs = [lr, mom], outputs = self.errors, updates = updates, givens = {self.x: train_set_x, self.y: train_set_y, self.d: train_set_d, self.f: train_set_f, self.is_train: np.cast['int32'](1)}, on_unused_input='ignore') valid_model = theano.function(inputs = [], outputs = self.errors, givens = {self.x: valid_set_x, self.y: valid_set_y, self.d: valid_set_d, self.f: valid_set_f, self.is_train: np.cast['int32'](0)}, on_unused_input='ignore') return train_model, valid_model
def build_finetune_functions(self, train_shared_xy, valid_shared_xy, use_lhuc=False, layer_index=0): """ This function is to build finetune functions and to update gradients :param train_shared_xy: theano shared variable for input and output training data :type train_shared_xy: tuple of shared variable :param valid_shared_xy: theano shared variable for input and output development data :type valid_shared_xy: tuple of shared variable :returns: finetune functions for training and development """ logger = logging.getLogger("DNN initialization") (train_set_x, train_set_y) = train_shared_xy (valid_set_x, valid_set_y) = valid_shared_xy lr = T.scalar('lr', dtype = theano.config.floatX) mom = T.scalar('mom', dtype = theano.config.floatX) # momentum cost = self.finetune_cost #+ self.L2_reg * self.L2_sqr ## added for LHUC if use_lhuc: # In lhuc the parameters are only scaling parameters which have the name 'c' self.lhuc_params = [] for p in self.params: if p.name == 'c': self.lhuc_params.append(p) params = self.lhuc_params gparams = T.grad(cost, params) else: params = self.params gparams = T.grad(cost, params) freeze_params = 0 for layer in range(layer_index): freeze_params += len(self.rnn_layers[layer].params) # use optimizer if self.optimizer=='sgd': # zip just concatenate two lists updates = OrderedDict() for i, (param, gparam) in enumerate(zip(params, gparams)): weight_update = self.updates[param] upd = mom * weight_update - lr * gparam updates[weight_update] = upd # freeze layers and update weights if i >= freeze_params: updates[param] = param + upd elif self.optimizer=='adam': updates = compile_ADAM_train_function(self, gparams, learning_rate=lr) elif self.optimizer=='rprop': updates = compile_RPROP_train_function(self, gparams) else: logger.critical("This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n" %(self.optimizer)) sys.exit(1) train_model = theano.function(inputs = [lr, mom], #index, batch_size outputs = self.errors, updates = updates, givens = {self.x: train_set_x, #[index*batch_size:(index + 1)*batch_size] self.y: train_set_y, self.is_train: np.cast['int32'](1)}, on_unused_input='ignore') valid_model = theano.function(inputs = [], outputs = self.errors, givens = {self.x: valid_set_x, self.y: valid_set_y, self.is_train: np.cast['int32'](0)}, on_unused_input='ignore') return train_model, valid_model
def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size, \ return_valid_score_i=False): (train_set_x, train_set_y) = train_shared_xy (valid_set_x, valid_set_y) = valid_shared_xy # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches /= batch_size index = T.lscalar('index') # index to a [mini]batch learning_rate = T.fscalar('learning_rate') momentum = T.fscalar('momentum') layer_size = len(self.params) lr_list = [] for i in xrange(layer_size): lr_list.append(learning_rate) ##top 2 layers use a smaller learning rate ##hard-code now, change it later if layer_size > 4: for i in range(layer_size - 4, layer_size): lr_list[i] = learning_rate * 0.5 # compute list of fine-tuning updates # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) if self.use_rprop == 0: updates = theano.compat.python2x.OrderedDict() layer_index = 0 for dparam, gparam in zip(self.delta_params, gparams): updates[ dparam] = momentum * dparam - gparam * lr_list[layer_index] layer_index += 1 for dparam, param in zip(self.delta_params, self.params): updates[param] = param + updates[dparam] on_unused_input_value = 'raise' ## Theano's default elif self.use_rprop: updates = compile_RPROP_train_function(self, gparams) on_unused_input_value = 'warn' ## Retain learning rate and momentum to make interface backwards compatible, ## even with RPROP where we don't use them, means we have to use on_unused_input='warn'. train_fn = theano.function( inputs=[ index, theano.Param(learning_rate, default=0.125), theano.Param(momentum, default=0.5) ], outputs=self.errors, updates=updates, on_unused_input=on_unused_input_value, givens={ self.x: train_set_x[index * batch_size:(index + 1) * batch_size], self.y: train_set_y[index * batch_size:(index + 1) * batch_size] }) valid_fn = theano.function([], outputs=self.errors, givens={ self.x: valid_set_x, self.y: valid_set_y }) valid_score_i = theano.function( [index], outputs=self.errors, givens={ self.x: valid_set_x[index * batch_size:(index + 1) * batch_size], self.y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # Create a function that scans the entire validation set def valid_score(): return [valid_score_i(i) for i in xrange(n_valid_batches)] if return_valid_score_i: return train_fn, valid_fn, valid_score_i else: return train_fn, valid_fn