def __init__(self, model, saving_path, log=None): check_type(model, EventCompositionModel) self.model = model # directory to save intermediate and final results if not os.path.exists(saving_path): os.makedirs(saving_path) self.saving_path = saving_path if log is None: self.log = get_console_logger('event_comp_trainer') else: self.log = log
import timeit import on from on.common.util import FancyConfigParser from config import cfg from utils import consts, get_console_logger, supress_fd, restore_fd log = get_console_logger() def get_default_ontonotes_config(): on_cfg = FancyConfigParser() on_cfg.add_section('corpus') on_cfg.set('corpus', '__name__', 'corpus') on_cfg.set('corpus', 'granularity', 'source') on_cfg.set('corpus', 'banks', 'parse coref name') on_cfg.set('corpus', 'wsd-indexing', 'word') on_cfg.set('corpus', 'name-indexing', 'word') return on_cfg def load_ontonotes(corpus): assert corpus in consts.valid_ontonotes_corpus, \ 'ontonotes corpora can only be one of {}'.format( consts.valid_ontonotes_corpus) log.info('Reading Ontonotes corpus {} from {}'.format( corpus, cfg.ontonotes_root)) on_cfg = get_default_ontonotes_config()
def train(self, batch_iterator, iterations=10000, log=None, training_cost_prop_change_threshold=0.0005, learning_rate=0.1, regularization=0., corruption_level=0., loss='xent', log_every_batch=1000): """ Train on data stored in Theano tensors. Uses minibatch training. batch_iterator should be a repeatable iterator producing batches. iteration_callback is called after each iteration with args ( iteration, error array). The algorithm will assume it has converged and stop early if the proportional change between successive training costs drops below training_cost_prop_change_threshold for five iterations in a row. Uses L2 regularization. """ if log is None: log = get_console_logger() log.info( 'Training params: learning rate={}, noise ratio={:.1f}%, ' 'regularization={}'.format( learning_rate, corruption_level * 100.0, regularization)) log.info('Training with SGD') # Compile functions # Prepare cost/update functions for training cost, updates = self.network.get_cost_updates( learning_rate=self.learning_rate, regularization=self.regularization, corruption_level=corruption_level, loss=loss) # Prepare training functions train_fn = theano.function( inputs=[ self.network.x, theano.In(self.learning_rate, value=0.1), theano.In(self.regularization, value=0.0) ], outputs=cost, updates=updates, ) # Keep a record of costs, so we can plot them training_costs = [] # Keep a copy of the best weights so far below_threshold_its = 0 for i in range(iterations): err = 0.0 batch_num = 0 for batch_num, batch in enumerate(batch_iterator): # Shuffle the training data between iterations, as one should # with SGD # Just shuffle within batches shuffle = numpy.random.permutation(batch.shape[0]) batch[:] = batch[shuffle] # Update the model with this batch's data err += train_fn(batch, learning_rate=learning_rate, regularization=regularization) if (batch_num + 1) % log_every_batch == 0: log.info( 'Iteration {}: Processed {:>8d}/{:>8d} batches'.format( i, batch_num + 1, batch_iterator.num_batch)) log.info( 'Iteration {}: Processed {:>8d}/{:>8d} batches'.format( i, batch_iterator.num_batch, batch_iterator.num_batch)) training_costs.append(err / batch_num) log.info( 'COMPLETED ITERATION {:d}: training cost={:.5f}'.format( i, training_costs[-1])) # Check the proportional change between this iteration's training # cost and the last if len(training_costs) > 2: training_cost_prop_change = abs( (training_costs[-2] - training_costs[-1]) / training_costs[-2]) if training_cost_prop_change < \ training_cost_prop_change_threshold: # Very small change in training cost - maybe we've converged below_threshold_its += 1 if below_threshold_its >= 5: # We've had enough iterations with very small changes: # we've converged log.info( 'Proportional change in training cost ({}) below ' '{} for 5 successive iterations: converged'.format( training_cost_prop_change, training_cost_prop_change_threshold)) break else: log.info( 'Proportional change in training cost ({}) below ' '{} for {} successive iterations: waiting until ' 'it\'s been low for 5 iterations'.format( training_cost_prop_change, training_cost_prop_change_threshold, below_threshold_its)) else: # Reset the below threshold counter below_threshold_its = 0
def train(self, batch_iterator, iterations=10000, iteration_callback=None, log=None, training_cost_prop_change_threshold=0.0005, val_batch_iterator=None, stopping_iterations=10, log_every_batch=1000): # TODO: add logic for validation set and stopping_iterations parameter if log is None: log = get_console_logger() log.info( 'Tuning params: learning rate={} (->{}), regularization={}'.format( self.learning_rate, self.min_learning_rate, self.regularization)) if self.update_event_vectors: log.info('Updating event vector network') if self.update_input_vectors: log.info('Updating word2vec word representations') if self.update_empty_vectors: log.info('Training empty argument vectors') # Compile functions # Prepare cost/update functions for training cost, updates = self.get_triple_cost_updates(compute_update=True) # Prepare training functions train_fn = theano.function( inputs=self.model.triple_inputs + [ # Allow the learning rate to be set per update theano.In(self.learning_rate_var, value=self.learning_rate) ], outputs=cost, updates=updates, ) # Prepare cost functions without regularization for validation cost_without_reg = self.get_triple_cost_updates(regularization=0., compute_update=False) cost_fn = theano.function( inputs=self.model.triple_inputs, outputs=cost_without_reg, ) # Keep a record of costs, so we can plot them training_costs = [] val_costs = [] # Keep a copy of the best weights so far best_weights = best_iter = best_val_cost = None if val_batch_iterator is not None: best_weights = self.model.get_weights() best_iter = -1 best_val_cost = PairCompositionTrainer.compute_val_cost( cost_fn, val_batch_iterator) below_threshold_its = 0 learning_rate = self.learning_rate last_update_lr_iter = 0 if val_batch_iterator is not None: # Compute the initial cost on the validation set val_cost = PairCompositionTrainer.compute_val_cost( cost_fn, val_batch_iterator) log.info('Initial validation cost: {:.4f}'.format(val_cost)) for i in range(iterations): err = 0.0 batch_num = 0 for batch_num, batch_inputs in enumerate(batch_iterator): # Shuffle the training data between iterations, as one should # with SGD # Just shuffle within batches shuffle = numpy.random.permutation(batch_inputs[0].shape[0]) for batch_data in batch_inputs: batch_data[:] = batch_data[shuffle] # Update the model with this batch's data err += train_fn(*batch_inputs, learning_rate=learning_rate) if (batch_num + 1) % log_every_batch == 0: log.info('Iteration {}: Processed {:>8d}/{:>8d} batches, ' 'learning rate = {:g}'.format( i, batch_num + 1, batch_iterator.num_batch, learning_rate)) log.info('Iteration {}: Processed {:>8d}/{:>8d} batches'.format( i, batch_iterator.num_batch, batch_iterator.num_batch)) training_costs.append(err / (batch_num + 1)) if val_batch_iterator is not None: # Compute the cost function on the validation set val_cost = PairCompositionTrainer.compute_val_cost( cost_fn, val_batch_iterator) val_costs.append(val_cost) if val_cost <= best_val_cost: # We assume that, if the validation error remains the same, # it's better to use the new set of # weights (with, presumably, a better training error) if val_cost == best_val_cost: log.info('Same validation cost: {:.4f}, ' 'using new weights'.format(val_cost)) else: log.info('New best validation cost: {:.4f}'.format( val_cost)) # Update our best estimate best_weights = self.model.get_weights() best_iter = i best_val_cost = val_cost if val_cost >= best_val_cost \ and i - best_iter >= stopping_iterations: # We've gone on long enough without improving validation # error, time to call a halt and use the best validation # error we got log.info('Stopping after {} iterations of increasing ' 'validation cost'.format(stopping_iterations)) break log.info('COMPLETED ITERATION {}: training cost={:.5g}, ' 'validation cost={:.5g}'.format(i, training_costs[-1], val_costs[-1])) if val_costs[-1] >= best_val_cost and i - best_iter >= 2 \ and i - last_update_lr_iter >= 2 \ and learning_rate > self.min_learning_rate: # We've gone on 2 iterations without improving validation # error, time to reduce the learning rate learning_rate /= 2 if learning_rate < self.min_learning_rate: learning_rate = self.min_learning_rate last_update_lr_iter = i log.info('Halving learning rate to {} after 2 iterations of ' 'increasing validation cost'.format(learning_rate)) if iteration_callback is not None: # Not computing training error at the moment iteration_callback(i) # Check the proportional change between this iteration's training # cost and the last if len(training_costs) > 2: training_cost_prop_change = abs( (training_costs[-2] - training_costs[-1]) / training_costs[-2]) if training_cost_prop_change < \ training_cost_prop_change_threshold: # Very small change in training cost - maybe we've converged below_threshold_its += 1 if below_threshold_its >= 5: # We've had enough iterations with very small changes: # we've converged log.info( 'Proportional change in training cost ({:g}) below ' '{:g} for five successive iterations: ' 'converged'.format( training_cost_prop_change, training_cost_prop_change_threshold)) break else: log.info( 'Proportional change in training cost ({:g}) below ' '{:g} for {} successive iterations: waiting until ' 'it is been low for five iterations'.format( training_cost_prop_change, training_cost_prop_change_threshold, below_threshold_its)) else: # Reset the below threshold counter below_threshold_its = 0 if best_weights is not None: # Use the weights that gave us the best error on the validation set self.model.set_weights(best_weights)