def set_param_values(self, param_values, borrow=True): """ This sets the model parameters from the list of values given. This method is useful when you are loading model parameters, or are doing distributed programming and want to train parallel models. The order of param_values matters! It must be the same as the order of parameters returned from self.get_params()! ------------------ :param param_values: list of theano/numpy arrays of values for the model parameters :type param_values: List(array) :param borrow: theano 'borrow' parameter for set_value() method on shared variables :type borrow: Boolean :return: whether or not successful :rtype: Boolean """ params = self.get_params() # make sure the input list of values is the same length as the params for the model. if len(param_values) != len(params): log.error( "%s length of input params to set_param_values() different from length of self.get_params(). " "Input was %s, expected %s", str(type(self)), str(len(param_values)), str(len(self.get_params()))) return False # for each parameter and value in order, set the value! try: set_shared_values(params, param_values, borrow=borrow) except Exception, e: log.exception("%s had Exception %s", str(type(self)), str(e)) return False
def set_param_values(self, param_values, borrow=True): """ This sets the model parameters from the list of values given. This method is useful when you are loading model parameters, or are doing distributed programming and want to train parallel models. The order of param_values matters! It must be the same as the order of parameters returned from self.get_params()! ------------------ :param param_values: list of theano/numpy arrays of values for the model parameters :type param_values: List(array) :param borrow: theano 'borrow' parameter for set_value() method on shared variables :type borrow: Boolean :return: whether or not successful :rtype: Boolean """ params = self.get_params() # make sure the input list of values is the same length as the params for the model. if len(param_values) != len(params): log.error("%s length of input params to set_param_values() different from length of self.get_params(). " "Input was %s, expected %s", str(type(self)), str(len(param_values)), str(len(self.get_params()))) return False # for each parameter and value in order, set the value! try: set_shared_values(params, param_values, borrow=borrow) except Exception, e: log.exception("%s had Exception %s", str(type(self)), str(e)) return False
def train(self, continue_training=False): log.info( "-----------TRAINING %s FOR %s EPOCHS (continue_training=%s)-----------", str(type(self.model)), str(self.n_epoch), str(continue_training)) log.debug("Train dataset size is: %s", self.dataset.getDataShape(datasets.TRAIN)) if self.dataset.hasSubset(datasets.VALID): log.debug("Valid dataset size is: %s", self.dataset.getDataShape(datasets.VALID)) if self.dataset.hasSubset(datasets.TEST): log.debug("Test dataset size is: %s", self.dataset.getDataShape(datasets.TEST)) self.STOP = False self.epoch_counter = 0 if not continue_training: # reset the learning rate if hasattr(self, 'learning_rate_decay'): self.learning_rate_decay.reset() # reset the other model decaying functions for decay_param in self.model.get_decay_params(): decay_param.reset() self.times = [] self.best_cost = float('inf') self.best_params = None self.patience = 0 start_time = time.time() while not self.STOP: try: self.STOP = self._perform_one_epoch() except KeyboardInterrupt: log.info("STOPPING EARLY FROM KEYBOARDINTERRUPT") self.STOP = True #save params if self.best_params is not None: log.debug("Restoring best model parameters...") set_shared_values(self.params, self.best_params) log.debug("Saving model parameters...") self.model.save_params('trained_epoch_' + str(self.epoch_counter) + '.pkl') log.info("------------TOTAL %s TRAIN TIME TOOK %s---------", str(type(self.model)), make_time_units_string(time.time() - start_time))
def train(self, continue_training=False): """ This method performs the training!!! :param continue_training: :type continue_training: :return: :rtype: """ # grab the model parameters to use during training self.params = self.model.get_params() log.info("%s params: %s", str(type(self.model)), str(self.params)) ############################################### # theano index variable to use on the dataset # ############################################### # index to a [mini]batch - both start and end data_idx = T.iscalar('data_index') data_end_idx = T.iscalar('data_end_index') batch_slice = slice(data_idx, data_end_idx) # compute number of minibatches for training, validation and testing # shapes is list of list - input list of datasets to optimizer (for multiple inputs), and each dataset # could be a list of shared variables (like multiple sequences from files) train_data_shapes = raise_to_list(self.dataset.getDataShape(TRAIN)) valid_data_shapes = raise_to_list(self.dataset.getDataShape(VALID)) test_data_shapes = raise_to_list(self.dataset.getDataShape(TEST)) # train_batches is going to be lists of tuples that contain the start and end indices for train data train_data_lens = [shape[0] for shape in train_data_shapes] self.train_batches = self.get_batch_indices(train_data_lens) if valid_data_shapes is not None: valid_data_lens = [shape[0] for shape in valid_data_shapes] self.valid_batches = self.get_batch_indices(valid_data_lens) else: self.valid_batches = None if test_data_shapes is not None: test_data_lens = [shape[0] for shape in test_data_shapes] self.test_batches = self.get_batch_indices(test_data_lens) else: self.test_batches = None # translate the data_idx into the givens for the model model_inputs = raise_to_list(self.model.get_inputs()) model_targets = raise_to_list(self.model.get_targets()) train_data, train_labels = self.dataset.getSubset(TRAIN) train_givens = OrderedDict(zip(model_inputs, [train_data[batch_slice]])) if model_targets is not None and len(model_targets) > 0: train_givens.update(OrderedDict(zip(model_targets, [train_labels[batch_slice]]))) valid_data, valid_labels = self.dataset.getSubset(VALID) valid_givens = OrderedDict(zip(model_inputs, [valid_data[batch_slice]])) if model_targets is not None and len(model_targets) > 0: valid_givens.update(OrderedDict(zip(model_targets, [valid_labels[batch_slice]]))) test_data, test_labels = self.dataset.getSubset(TEST) test_givens = OrderedDict(zip(model_inputs, [test_data[batch_slice]])) if model_targets is not None and len(model_targets) > 0: test_givens.update(OrderedDict(zip(model_targets, [test_labels[batch_slice]]))) # Now time to create the training cost functions for the model - make sure to handle the possible # list of costs used for pretraining of certain parts of the model. train_costs = raise_to_list(self.model.get_train_cost()) self.train_functions = [] for i, train_cost in enumerate(train_costs): # Now create the training cost function for the model to use while training - update parameters # gradient! gradients, _ = self.model.get_gradient(cost=train_cost) # Calculate the optimizer updates each run # This is where the magic happens for a lot of sub-implementations of SGD, including AdaDelta! # It tells how to update the params each training epoch gradient_updates = self.get_updates(gradients) # Combine the updates from the model also if applicable train_updates = self.model.get_updates() if train_updates: train_updates.update(gradient_updates) else: train_updates = gradient_updates # Compile the training function! log.info('Compiling f_learn %d/%d function for model %s...', i + 1, len(train_costs), str(type(self.model))) t = time.time() f_learn = function(inputs=[data_idx, data_end_idx], updates=train_updates, outputs=train_cost, givens=train_givens, name='f_learn_%d' % i) log.info('f_learn compilation took %s', make_time_units_string(time.time() - t)) self.train_functions.append(f_learn) # grab the expression(s) to use to monitor different model values during training log.debug("Compiling monitor functions...") monitor_t = time.time() self.monitors = OrderedDict(self.model.get_monitors()) self.monitor_names = self.monitors.keys() if len(self.monitors.keys()) > 0: self.train_monitor_function = function( inputs=[data_idx, data_end_idx], updates=self.model.get_updates(), outputs=self.monitors.values(), givens=train_givens, name="train_monitor_function" ) if len(self.monitors.keys()) > 0: self.valid_monitor_function = function( inputs=[data_idx, data_end_idx], updates=self.model.get_updates(), outputs=self.monitors.values(), givens=valid_givens, name="valid_monitor_function" ) if len(self.monitors.keys()) > 0: self.test_monitor_function = function( inputs=[data_idx, data_end_idx], updates=self.model.get_updates(), outputs=self.monitors.values(), givens=test_givens, name="test_monitor_function" ) log.debug("Compilation done. Took %s", make_time_units_string(time.time() - monitor_t)) self.noise_switches = raise_to_list(self.model.get_noise_switch()) ################## # start training # ################## # make sure to deal with a list of train_cost functions - for layer-wise pretraining! # this list of training functions was created during __init__() start_time = time.time() for func_i, train_function in enumerate(self.train_functions): log.info("-----------TRAINING %s function %d/%d FOR %d EPOCHS (continue_training=%s)-----------", str(type(self.model)), func_i + 1, len(self.train_functions), self.n_epoch, str(continue_training)) log.debug("Train dataset size is: %s", self.dataset.getDataShape(TRAIN)) if self.dataset.hasSubset(VALID): log.debug("Valid dataset size is: %s", self.dataset.getDataShape(VALID)) if self.dataset.hasSubset(TEST): log.debug("Test dataset size is: %s", self.dataset.getDataShape(TEST)) self.STOP = False self.epoch_counter = 0 if not continue_training: # reset the learning rate if hasattr(self, 'learning_rate_decay') and self.learning_rate_decay: self.learning_rate_decay.reset() # reset the other model decaying functions for decay_param in self.model.get_decay_params(): decay_param.reset() self.times = [] self.best_cost = numpy.inf self.best_params = None self.patience = 0 t = time.time() while not self.STOP: try: self.STOP = self._perform_one_epoch(train_function) except KeyboardInterrupt: log.info("STOPPING EARLY FROM KEYBOARDINTERRUPT") self.STOP = True # save params if self.best_params is not None: log.debug("Restoring best model parameters...") set_shared_values(self.params, self.best_params) log.debug("Saving model parameters...") self.model.save_params('trained_epoch_' + str(self.epoch_counter) + '.pkl') log.info("------------TRAIN TIME TOOK %s---------", make_time_units_string(time.time() - t)) log.info("------------TOTAL %s TRAIN TIME TOOK %s---------", str(type(self.model)), make_time_units_string(time.time() - start_time))
def train(self, monitor_channels=None, train_outservice=None, plot=None, additional_cost=None): """ This method performs the training!!! It is an online training method that goes over minibatches from the dataset for a number of epochs, updating parameters after each minibatch. You can disrupt training with a KeyBoardInterrupt and it should exit/save parameters gracefully. Parameters ---------- monitor_channels : list(MonitorsChannel or Monitor), optional The list of channels or monitors containing monitor expressions/variables to compile and evaluate on the data. train_outservice : OutService, optional The OutService to use for the automatically created train_cost monitor. Default of None just outputs to logs. plot : Plot, optional The Plot object to use if we want to graph the outputs (uses bokeh server). additional_cost : theano expression or list(theano expression), optional Any additional cost expressions to use during training (things like regularization). These will be summed with the existing cost. """ if not self.model: log.error("No self.model for the Optimizer!") raise AssertionError("Needs to be initialized with a Model! (Or something went wrong if train() " "was called from the Model. Try initializing the Optimizer with the model param " "and calling optimizer.train().") ##################################################### # handle additional costs (normally regularization) # ##################################################### # Create the gradient updates for the model - make sure to handle the possible # list of costs used for pretraining of certain parts of the model. train_costs = raise_to_list(self.model.get_train_cost()) # deal with any other additional costs (like regularization, etc.) if additional_cost is not None: additional_costs = raise_to_list(additional_cost) if len(additional_costs) > 1: additional_cost = T.sum(additional_costs) ######################### # gradients and updates # ######################### train_updates = [] self.gradients = [] for i, train_cost in enumerate(train_costs): # Now create the training cost function for the model to use while training - update parameters # gradient! if len(train_costs) > 1 and additional_cost is not None: log.warning("additional_cost will double count with gradients during layer-wise pretraining!") warnings.warn("additional_cost will double count with gradients during layer-wise pretraining!") # TODO: additional_cost will double count with gradients during layer-wise pretraining. # Need to somehow make w.r.t. params appropriate for the individual training costs. gradients, _ = self.model.get_gradient(cost=train_cost, additional_cost=additional_cost) # clip gradients if we want. gradients = clip_gradients(gradients, self.grad_clip, self.hard_clip) # append to list self.gradients.append(gradients) # Calculate the optimizer updates each run # This is where the magic happens for a lot of sub-implementations of SGD! # It tells how to update the params each training epoch gradient_updates = self.get_updates(gradients) # Combine the updates from the model also if applicable updates = self.model.get_updates() if updates: updates.update(gradient_updates) else: updates = gradient_updates train_updates.append(updates) # grab the model parameters to use during training self.params = self.model.get_params() log.info("%s params: %s", str(type(self.model)), str(self.params)) ############ # monitors # ############ # deal with the monitor channels if they were given (or take them from the plot) if monitor_channels is None and plot is not None and len(plot.channels) > 0: monitor_channels = plot.channels self.train_monitors_dict = {} self.valid_monitors_dict = {} self.test_monitors_dict = {} self.train_monitors_outservice_dict = {} self.valid_monitors_outservice_dict = {} self.test_monitors_outservice_dict = {} if monitor_channels: # collapse the appropriate monitors into their (name, expression, out_service) tuples train_collapsed = collapse_channels(monitor_channels, train=True) valid_collapsed = collapse_channels(monitor_channels, valid=True) test_collapsed = collapse_channels(monitor_channels, test=True) # get name: expression dictionary self.train_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in train_collapsed]) self.valid_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in valid_collapsed]) self.test_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in test_collapsed]) # get name: outservice dictionary self.train_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in train_collapsed]) self.valid_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in valid_collapsed]) self.test_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in test_collapsed]) # finally deal with an outservice provided to monitor training cost self.train_outservice = train_outservice # remove redundant files made by the fileservice for the train monitor. # TODO: THIS FEELS LIKE A HACK. I don't like it. if isinstance(self.train_outservice, FileService): os.remove(self.train_outservice.valid_filename) os.remove(self.train_outservice.test_filename) ####################################### # compile train and monitor functions # ####################################### function_input = raise_to_list(self.model.get_inputs()) + raise_to_list(self.model.get_targets()) train_functions = [] for i, (updates, train_cost) in enumerate(zip(train_updates, train_costs)): # Compile the training function! log.info('Compiling f_learn %d/%d function for model %s...', i + 1, len(train_updates), str(type(self.model))) t = time.time() f_learn = function(inputs=function_input, updates=updates, outputs=[train_cost] + list(self.train_monitors_dict.values()), name='f_learn_%d' % i) log.info('f_learn %d compilation took %s', i + 1, make_time_units_string(time.time() - t)) train_functions.append(f_learn) # figure out if we want valid and test (monitors) self.valid_flag = (self.dataset.valid_inputs is not None) and (len(self.valid_monitors_dict) > 0) self.test_flag = (self.dataset.test_inputs is not None) and (len(self.test_monitors_dict) > 0) # Now compile the monitor functions! log.debug("Compiling monitor functions...") monitor_t = time.time() # valid monitors if self.valid_flag: self.valid_monitor_function = function( inputs=function_input, updates=self.model.get_updates(), outputs=list(self.valid_monitors_dict.values()), name='valid_monitor_function' ) else: self.valid_monitor_function = None # test monitors if self.test_flag: self.test_monitor_function = function( inputs=function_input, updates=self.model.get_updates(), outputs=list(self.test_monitors_dict.values()), name='test_monitor_function' ) else: self.test_monitor_function = None log.debug("Compilation done. Took %s", make_time_units_string(time.time() - monitor_t)) ################## # start training # ################## # make sure to deal with a list of train_cost functions - for layer-wise pretraining! # this list of training functions was created during __init__() start_time = time.time() for func_i, train_function in enumerate(train_functions): log.info("-----------TRAINING %s function %d/%d FOR %d EPOCHS-----------", str(type(self.model)), func_i + 1, len(train_functions), self.n_epoch) self.STOP = False self.epoch_counter = 0 # reset any decay params for decay_param in self.get_decay_params(): decay_param.reset() self.times = [] self.best_cost = numpy.inf self.best_params = None self.patience = 0 t = time.time() while not self.STOP: try: self.STOP = self._perform_one_epoch(train_function, plot) except KeyboardInterrupt: log.info("STOPPING EARLY FROM KEYBOARDINTERRUPT") self.STOP = True # save params if self.best_params is not None: log.debug("Restoring best model parameters...") set_shared_values(self.params, self.best_params) log.debug("Saving model parameters...") self.model.save_params('trained_epoch_' + str(self.epoch_counter)) log.info("------------TRAIN TIME TOOK %s---------", make_time_units_string(time.time() - t)) log.info("------------TOTAL %s TRAIN TIME TOOK %s---------", str(type(self.model)), make_time_units_string(time.time() - start_time))
def train(self, monitor_channels=None, train_outservice=None, plot=None, additional_cost=None): """ This method performs the training!!! It is an online training method that goes over minibatches from the dataset for a number of epochs, updating parameters after each minibatch. You can disrupt training with a KeyBoardInterrupt and it should exit/save parameters gracefully. Parameters ---------- monitor_channels : list(MonitorsChannel or Monitor), optional The list of channels or monitors containing monitor expressions/variables to compile and evaluate on the data. train_outservice : OutService, optional The OutService to use for the automatically created train_cost monitor. Default of None just outputs to logs. plot : Plot, optional The Plot object to use if we want to graph the outputs (uses bokeh server). additional_cost : theano expression or list(theano expression), optional Any additional cost expressions to use during training (things like regularization). These will be summed with the existing cost. """ if not self.model: log.error("No self.model for the Optimizer!") raise AssertionError("Needs to be initialized with a Model! (Or something went wrong if train() " "was called from the Model. Try initializing the Optimizer with the model param " "and calling optimizer.train().") ##################################################### # handle additional costs (normally regularization) # ##################################################### # Create the gradient updates for the model - make sure to handle the possible # list of costs used for pretraining of certain parts of the model. train_costs = raise_to_list(self.model.get_train_cost()) # deal with any other additional costs (like regularization, etc.) if additional_cost is not None: additional_costs = raise_to_list(additional_cost) if len(additional_costs) > 1: additional_cost = T.sum(additional_costs) ######################### # gradients and updates # ######################### train_updates = [] self.gradients = [] for i, train_cost in enumerate(train_costs): # Now create the training cost function for the model to use while training - update parameters # gradient! if len(train_costs) > 1 and additional_cost is not None: log.warning("additional_cost will double count with gradients during layer-wise pretraining!") warnings.warn("additional_cost will double count with gradients during layer-wise pretraining!") # TODO: additional_cost will double count with gradients during layer-wise pretraining. # Need to somehow make w.r.t. params appropriate for the individual training costs. gradients, _ = self.model.get_gradient(cost=train_cost, additional_cost=additional_cost) # clip gradients if we want. gradients = clip_gradients(gradients, self.grad_clip, self.hard_clip) # append to list self.gradients.append(gradients) # Calculate the optimizer updates each run # This is where the magic happens for a lot of sub-implementations of SGD! # It tells how to update the params each training epoch gradient_updates = self.get_updates(gradients) # Combine the updates from the model also if applicable updates = self.model.get_updates() if updates: updates.update(gradient_updates) else: updates = gradient_updates train_updates.append(updates) # grab the model parameters to use during training self.params = self.model.get_params() log.info("%s params: %s", str(type(self.model)), str(self.params)) ############ # monitors # ############ # deal with the monitor channels if they were given (or take them from the plot) if monitor_channels is None and plot is not None and len(plot.channels) > 0: monitor_channels = plot.channels self.train_monitors_dict = {} self.valid_monitors_dict = {} self.test_monitors_dict = {} self.train_monitors_outservice_dict = {} self.valid_monitors_outservice_dict = {} self.test_monitors_outservice_dict = {} if monitor_channels: # collapse the appropriate monitors into their (name, expression, out_service) tuples train_collapsed = collapse_channels(monitor_channels, train=True) valid_collapsed = collapse_channels(monitor_channels, valid=True) test_collapsed = collapse_channels(monitor_channels, test=True) # get name: expression dictionary self.train_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in train_collapsed]) self.valid_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in valid_collapsed]) self.test_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in test_collapsed]) # get name: outservice dictionary self.train_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in train_collapsed]) self.valid_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in valid_collapsed]) self.test_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in test_collapsed]) # finally deal with an outservice provided to monitor training cost self.train_outservice = train_outservice # remove redundant files made by the fileservice for the train monitor. # TODO: THIS FEELS LIKE A HACK. I don't like it. if isinstance(self.train_outservice, FileService): os.remove(self.train_outservice.valid_filename) os.remove(self.train_outservice.test_filename) ####################################### # compile train and monitor functions # ####################################### function_input = raise_to_list(self.model.get_inputs()) + raise_to_list(self.model.get_targets()) train_functions = [] for i, (updates, train_cost) in enumerate(zip(train_updates, train_costs)): # Compile the training function! log.info('Compiling f_learn %d/%d function for model %s...', i + 1, len(train_updates), str(type(self.model))) t = time.time() f_learn = function(inputs=function_input, updates=updates, outputs=[train_cost] + list(self.train_monitors_dict.values()), name='f_learn_%d' % i) log.info('f_learn %d compilation took %s', i + 1, make_time_units_string(time.time() - t)) train_functions.append(f_learn) # figure out if we want valid and test (monitors) self.valid_flag = (self.dataset.valid_inputs is not None) and (len(self.valid_monitors_dict) > 0) self.test_flag = (self.dataset.test_inputs is not None) and (len(self.test_monitors_dict) > 0) # Now compile the monitor functions! log.debug("Compiling monitor functions...") monitor_t = time.time() # valid monitors if self.valid_flag: self.valid_monitor_function = function( inputs=function_input, updates=self.model.get_updates(), outputs=list(self.valid_monitors_dict.values()), name='valid_monitor_function' ) else: self.valid_monitor_function = None # test monitors if self.test_flag: self.test_monitor_function = function( inputs=function_input, updates=self.model.get_updates(), outputs=list(self.test_monitors_dict.values()), name='test_monitor_function' ) else: self.test_monitor_function = None log.debug("Compilation done. Took %s", make_time_units_string(time.time() - monitor_t)) ################## # start training # ################## # make sure to deal with a list of train_cost functions - for layer-wise pretraining! # this list of training functions was created during __init__() start_time = time.time() for func_i, train_function in enumerate(train_functions): log.info("-----------TRAINING %s function %d/%d FOR %d EPOCHS-----------", str(type(self.model)), func_i + 1, len(train_functions), self.n_epoch) self.STOP = False self.epoch_counter = 0 # reset any decay params for decay_param in self.get_decay_params(): decay_param.reset() self.times = [] self.best_cost = numpy.inf self.best_params = None self.patience = 0 t = time.time() while not self.STOP: try: self.STOP = self._perform_one_epoch(train_function, plot) except KeyboardInterrupt: log.info("STOPPING EARLY FROM KEYBOARDINTERRUPT") self.STOP = True # save params if self.best_params is not None: log.debug("Restoring best model parameters...") set_shared_values(self.params, self.best_params) log.debug("Saving model parameters...") self.model.save_params('trained_epoch_' + str(self.epoch_counter) + '.pkl') log.info("------------TRAIN TIME TOOK %s---------", make_time_units_string(time.time() - t)) log.info("------------TOTAL %s TRAIN TIME TOOK %s---------", str(type(self.model)), make_time_units_string(time.time() - start_time))
def train(self, monitor_channels=None, train_outservice=None, plot=None, continue_training=False): """ This method performs the training!!! It is an online training method that goes over minibatches from the dataset for a number of epochs, updating parameters after each minibatch. You can disrupt training with a KeyBoardInterrupt and it should exit/save parameters gracefully. Parameters ---------- monitor_channels : list(MonitorsChannel or Monitor), optional The list of channels or monitors containing monitor expressions/variables to compile and evaluate on the data. train_outservice : OutService, optional The OutService to use for the automatically created train_cost monitor. Default of None just outputs to logs. plot : Plot, optional The Plot object to use if we want to graph the outputs (uses bokeh server). continue_training : bool Whether to continue training from a previous point. """ ############################################### # theano index variable to use on the dataset # ############################################### # index to a [mini]batch - both start and end data_idx = T.iscalar('data_index') data_end_idx = T.iscalar('data_end_index') function_input = [data_idx, data_end_idx] batch_slice = slice(data_idx, data_end_idx) # compute number of minibatches for training, validation and testing # shapes is list of list - input list of datasets to optimizer (for multiple inputs), and each dataset # could be a list of shared variables (like multiple sequences from files) train_data_shapes = raise_to_list(self.dataset.getDataShape(TRAIN)) valid_data_shapes = raise_to_list(self.dataset.getDataShape(VALID)) test_data_shapes = raise_to_list(self.dataset.getDataShape(TEST)) # train_batches is going to be lists of tuples that contain the start and end indices for train data. # this is more useful in the case of datasets that are lists of sequences, so that the start and end # indices can make sure a batch does not cross the sequence boundary on the concatenated data train_data_lens = [shape[0] for shape in train_data_shapes] self.train_batches = self._get_batch_indices(train_data_lens) if valid_data_shapes is not None: valid_data_lens = [shape[0] for shape in valid_data_shapes] self.valid_batches = self._get_batch_indices(valid_data_lens) else: self.valid_batches = None if test_data_shapes is not None: test_data_lens = [shape[0] for shape in test_data_shapes] self.test_batches = self._get_batch_indices(test_data_lens) else: self.test_batches = None # create the givens for the input function as pairs of (input_variable: sliced_data) train_givens = self._get_givens_subset(TRAIN, batch_slice) valid_givens = self._get_givens_subset(VALID, batch_slice) test_givens = self._get_givens_subset(TEST, batch_slice) # Now time to create the gradient updates for the model - make sure to handle the possible # list of costs used for pretraining of certain parts of the model. train_costs = raise_to_list(self.model.get_train_cost()) train_updates = [] self.gradients = [] for i, train_cost in enumerate(train_costs): # Now create the training cost function for the model to use while training - update parameters # gradient! gradients, _ = self.model.get_gradient(cost=train_cost) self.gradients.append(gradients) # Calculate the optimizer updates each run # This is where the magic happens for a lot of sub-implementations of SGD! # It tells how to update the params each training epoch gradient_updates = self.get_updates(gradients) # Combine the updates from the model also if applicable updates = self.model.get_updates() if updates: updates.update(gradient_updates) else: updates = gradient_updates train_updates.append(updates) # grab the model parameters to use during training self.params = self.model.get_params() log.info("%s params: %s", str(type(self.model)), str(self.params)) # deal with the monitor channels if they were given (or take them from the plot) if monitor_channels is None and plot is not None and len(plot.channels) > 0: monitor_channels = plot.channels self.train_monitors_dict = {} self.valid_monitors_dict = {} self.test_monitors_dict = {} self.train_monitors_outservice_dict = {} self.valid_monitors_outservice_dict = {} self.test_monitors_outservice_dict = {} if monitor_channels: # collapse the appropriate monitors into their (name, expression, out_service) tuples train_collapsed = collapse_channels(monitor_channels, train=True) valid_collapsed = collapse_channels(monitor_channels, valid=True) test_collapsed = collapse_channels(monitor_channels, test=True) # get name: expression dictionary self.train_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in train_collapsed]) self.valid_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in valid_collapsed]) self.test_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in test_collapsed]) # get name: outservice dictionary self.train_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in train_collapsed]) self.valid_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in valid_collapsed]) self.test_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in test_collapsed]) # finally deal with an outservice provided to monitor training cost self.train_outservice = train_outservice # remove redundant files made by the fileservice for the train monitor. # TODO: THIS FEELS LIKE A HACK. I don't like it. if isinstance(self.train_outservice, FileService): os.remove(self.train_outservice.valid_filename) os.remove(self.train_outservice.test_filename) ####################################### # compile train and monitor functions # ####################################### train_functions = [] for i in range(len(train_costs)): updates = train_updates[i] train_cost = train_costs[i] # Compile the training function! log.info('Compiling f_learn %d/%d function for model %s...', i + 1, len(train_updates), str(type(self.model))) t = time.time() f_learn = function(inputs=function_input, updates=updates, outputs=[train_cost] + self.train_monitors_dict.values(), givens=train_givens, name='f_learn_%d' % i) log.info('f_learn compilation took %s', make_time_units_string(time.time() - t)) train_functions.append(f_learn) # figure out if we want valid and test self.valid_flag = (self.dataset.getSubset(VALID)[0] is not None) and (len(self.valid_monitors_dict) > 0) self.test_flag = (self.dataset.getSubset(TEST)[0] is not None) and (len(self.test_monitors_dict) > 0) # Now compile the monitor functions! log.debug("Compiling monitor functions...") monitor_t = time.time() # valid monitors if self.valid_flag: self.valid_monitor_function = function( inputs=function_input, updates=self.model.get_updates(), outputs=self.valid_monitors_dict.values(), givens=valid_givens, name='valid_monitor_function' ) else: self.valid_monitor_function = None # test monitors if self.test_flag: self.test_monitor_function = function( inputs=function_input, updates=self.model.get_updates(), outputs=self.test_monitors_dict.values(), givens=test_givens, name='test_monitor_function' ) else: self.test_monitor_function = None log.debug("Compilation done. Took %s", make_time_units_string(time.time() - monitor_t)) ################## # start training # ################## # make sure to deal with a list of train_cost functions - for layer-wise pretraining! # this list of training functions was created during __init__() start_time = time.time() for func_i, train_function in enumerate(train_functions): log.info("-----------TRAINING %s function %d/%d FOR %d EPOCHS (continue_training=%s)-----------", str(type(self.model)), func_i + 1, len(train_functions), self.n_epoch, str(continue_training)) log.debug("Train dataset size is: %s", self.dataset.getDataShape(TRAIN)) if self.dataset.getSubset(VALID)[0] is not None: log.debug("Valid dataset size is: %s", self.dataset.getDataShape(VALID)) if self.dataset.getSubset(TEST)[0] is not None: log.debug("Test dataset size is: %s", self.dataset.getDataShape(TEST)) self.STOP = False self.epoch_counter = 0 if not continue_training: # reset any decay params for decay_param in self.get_decay_params(): decay_param.reset() self.times = [] self.best_cost = numpy.inf self.best_params = None self.patience = 0 t = time.time() while not self.STOP: try: self.STOP = self._perform_one_epoch(train_function, plot) except KeyboardInterrupt: log.info("STOPPING EARLY FROM KEYBOARDINTERRUPT") self.STOP = True # save params if self.best_params is not None: log.debug("Restoring best model parameters...") set_shared_values(self.params, self.best_params) log.debug("Saving model parameters...") self.model.save_params('trained_epoch_' + str(self.epoch_counter) + '.pkl') log.info("------------TRAIN TIME TOOK %s---------", make_time_units_string(time.time() - t)) log.info("------------TOTAL %s TRAIN TIME TOOK %s---------", str(type(self.model)), make_time_units_string(time.time() - start_time))