def __init__(self, model, saving_path, log=None):
     check_type(model, EventCompositionModel)
     self.model = model
     # directory to save intermediate and final results
     if not os.path.exists(saving_path):
         os.makedirs(saving_path)
     self.saving_path = saving_path
     if log is None:
         self.log = get_console_logger('event_comp_trainer')
     else:
         self.log = log
Exemplo n.º 2
0
import timeit

import on
from on.common.util import FancyConfigParser

from config import cfg
from utils import consts, get_console_logger, supress_fd, restore_fd

log = get_console_logger()


def get_default_ontonotes_config():
    on_cfg = FancyConfigParser()
    on_cfg.add_section('corpus')
    on_cfg.set('corpus', '__name__', 'corpus')
    on_cfg.set('corpus', 'granularity', 'source')
    on_cfg.set('corpus', 'banks', 'parse coref name')
    on_cfg.set('corpus', 'wsd-indexing', 'word')
    on_cfg.set('corpus', 'name-indexing', 'word')
    return on_cfg


def load_ontonotes(corpus):
    assert corpus in consts.valid_ontonotes_corpus, \
        'ontonotes corpora can only be one of {}'.format(
            consts.valid_ontonotes_corpus)

    log.info('Reading Ontonotes corpus {} from {}'.format(
        corpus, cfg.ontonotes_root))

    on_cfg = get_default_ontonotes_config()
Exemplo n.º 3
0
    def train(self, batch_iterator, iterations=10000, log=None,
              training_cost_prop_change_threshold=0.0005, learning_rate=0.1,
              regularization=0., corruption_level=0., loss='xent',
              log_every_batch=1000):
        """
        Train on data stored in Theano tensors. Uses minibatch training.

        batch_iterator should be a repeatable iterator producing batches.

        iteration_callback is called after each iteration with args (
        iteration, error array).

        The algorithm will assume it has converged and stop early if the 
        proportional change between successive
        training costs drops below training_cost_prop_change_threshold for 
        five iterations in a row.

        Uses L2 regularization.

        """
        if log is None:
            log = get_console_logger()

        log.info(
            'Training params: learning rate={}, noise ratio={:.1f}%, '
            'regularization={}'.format(
                learning_rate, corruption_level * 100.0, regularization))
        log.info('Training with SGD')

        # Compile functions
        # Prepare cost/update functions for training
        cost, updates = self.network.get_cost_updates(
            learning_rate=self.learning_rate,
            regularization=self.regularization,
            corruption_level=corruption_level,
            loss=loss)
        # Prepare training functions
        train_fn = theano.function(
            inputs=[
                self.network.x,
                theano.In(self.learning_rate, value=0.1),
                theano.In(self.regularization, value=0.0)
            ],
            outputs=cost,
            updates=updates,
        )

        # Keep a record of costs, so we can plot them
        training_costs = []

        # Keep a copy of the best weights so far
        below_threshold_its = 0

        for i in range(iterations):
            err = 0.0
            batch_num = 0
            for batch_num, batch in enumerate(batch_iterator):
                # Shuffle the training data between iterations, as one should
                # with SGD
                # Just shuffle within batches
                shuffle = numpy.random.permutation(batch.shape[0])
                batch[:] = batch[shuffle]

                # Update the model with this batch's data
                err += train_fn(batch,
                                learning_rate=learning_rate,
                                regularization=regularization)

                if (batch_num + 1) % log_every_batch == 0:
                    log.info(
                        'Iteration {}: Processed {:>8d}/{:>8d} batches'.format(
                            i, batch_num + 1, batch_iterator.num_batch))

            log.info(
                'Iteration {}: Processed {:>8d}/{:>8d} batches'.format(
                    i, batch_iterator.num_batch, batch_iterator.num_batch))

            training_costs.append(err / batch_num)

            log.info(
                'COMPLETED ITERATION {:d}: training cost={:.5f}'.format(
                    i, training_costs[-1]))

            # Check the proportional change between this iteration's training
            # cost and the last
            if len(training_costs) > 2:
                training_cost_prop_change = abs(
                    (training_costs[-2] - training_costs[-1]) /
                    training_costs[-2])
                if training_cost_prop_change < \
                        training_cost_prop_change_threshold:
                    # Very small change in training cost - maybe we've converged
                    below_threshold_its += 1
                    if below_threshold_its >= 5:
                        # We've had enough iterations with very small changes:
                        # we've converged
                        log.info(
                            'Proportional change in training cost ({}) below '
                            '{} for 5 successive iterations: converged'.format(
                                training_cost_prop_change,
                                training_cost_prop_change_threshold))
                        break
                    else:
                        log.info(
                            'Proportional change in training cost ({}) below '
                            '{} for {} successive iterations: waiting until '
                            'it\'s been low for 5 iterations'.format(
                                training_cost_prop_change,
                                training_cost_prop_change_threshold,
                                below_threshold_its))
                else:
                    # Reset the below threshold counter
                    below_threshold_its = 0
Exemplo n.º 4
0
    def train(self,
              batch_iterator,
              iterations=10000,
              iteration_callback=None,
              log=None,
              training_cost_prop_change_threshold=0.0005,
              val_batch_iterator=None,
              stopping_iterations=10,
              log_every_batch=1000):
        # TODO: add logic for validation set and stopping_iterations parameter
        if log is None:
            log = get_console_logger()

        log.info(
            'Tuning params: learning rate={} (->{}), regularization={}'.format(
                self.learning_rate, self.min_learning_rate,
                self.regularization))
        if self.update_event_vectors:
            log.info('Updating event vector network')
        if self.update_input_vectors:
            log.info('Updating word2vec word representations')
        if self.update_empty_vectors:
            log.info('Training empty argument vectors')

        # Compile functions
        # Prepare cost/update functions for training
        cost, updates = self.get_triple_cost_updates(compute_update=True)
        # Prepare training functions
        train_fn = theano.function(
            inputs=self.model.triple_inputs + [
                # Allow the learning rate to be set per update
                theano.In(self.learning_rate_var, value=self.learning_rate)
            ],
            outputs=cost,
            updates=updates,
        )
        # Prepare cost functions without regularization for validation
        cost_without_reg = self.get_triple_cost_updates(regularization=0.,
                                                        compute_update=False)
        cost_fn = theano.function(
            inputs=self.model.triple_inputs,
            outputs=cost_without_reg,
        )

        # Keep a record of costs, so we can plot them
        training_costs = []
        val_costs = []

        # Keep a copy of the best weights so far
        best_weights = best_iter = best_val_cost = None
        if val_batch_iterator is not None:
            best_weights = self.model.get_weights()
            best_iter = -1
            best_val_cost = PairCompositionTrainer.compute_val_cost(
                cost_fn, val_batch_iterator)

        below_threshold_its = 0

        learning_rate = self.learning_rate
        last_update_lr_iter = 0

        if val_batch_iterator is not None:
            # Compute the initial cost on the validation set
            val_cost = PairCompositionTrainer.compute_val_cost(
                cost_fn, val_batch_iterator)
            log.info('Initial validation cost: {:.4f}'.format(val_cost))

        for i in range(iterations):
            err = 0.0
            batch_num = 0

            for batch_num, batch_inputs in enumerate(batch_iterator):
                # Shuffle the training data between iterations, as one should
                # with SGD
                # Just shuffle within batches
                shuffle = numpy.random.permutation(batch_inputs[0].shape[0])
                for batch_data in batch_inputs:
                    batch_data[:] = batch_data[shuffle]

                # Update the model with this batch's data
                err += train_fn(*batch_inputs, learning_rate=learning_rate)

                if (batch_num + 1) % log_every_batch == 0:
                    log.info('Iteration {}: Processed {:>8d}/{:>8d} batches, '
                             'learning rate = {:g}'.format(
                                 i, batch_num + 1, batch_iterator.num_batch,
                                 learning_rate))

            log.info('Iteration {}: Processed {:>8d}/{:>8d} batches'.format(
                i, batch_iterator.num_batch, batch_iterator.num_batch))

            training_costs.append(err / (batch_num + 1))

            if val_batch_iterator is not None:
                # Compute the cost function on the validation set
                val_cost = PairCompositionTrainer.compute_val_cost(
                    cost_fn, val_batch_iterator)
                val_costs.append(val_cost)
                if val_cost <= best_val_cost:
                    # We assume that, if the validation error remains the same,
                    # it's better to use the new set of
                    # weights (with, presumably, a better training error)
                    if val_cost == best_val_cost:
                        log.info('Same validation cost: {:.4f}, '
                                 'using new weights'.format(val_cost))
                    else:
                        log.info('New best validation cost: {:.4f}'.format(
                            val_cost))
                    # Update our best estimate
                    best_weights = self.model.get_weights()
                    best_iter = i
                    best_val_cost = val_cost
                if val_cost >= best_val_cost \
                        and i - best_iter >= stopping_iterations:
                    # We've gone on long enough without improving validation
                    # error, time to call a halt and use the best validation
                    # error we got
                    log.info('Stopping after {} iterations of increasing '
                             'validation cost'.format(stopping_iterations))
                    break

            log.info('COMPLETED ITERATION {}: training cost={:.5g}, '
                     'validation cost={:.5g}'.format(i, training_costs[-1],
                                                     val_costs[-1]))

            if val_costs[-1] >= best_val_cost and i - best_iter >= 2 \
                    and i - last_update_lr_iter >= 2 \
                    and learning_rate > self.min_learning_rate:
                # We've gone on 2 iterations without improving validation
                # error, time to reduce the learning rate
                learning_rate /= 2
                if learning_rate < self.min_learning_rate:
                    learning_rate = self.min_learning_rate
                last_update_lr_iter = i
                log.info('Halving learning rate to {} after 2 iterations of '
                         'increasing validation cost'.format(learning_rate))

            if iteration_callback is not None:
                # Not computing training error at the moment
                iteration_callback(i)

            # Check the proportional change between this iteration's training
            # cost and the last
            if len(training_costs) > 2:
                training_cost_prop_change = abs(
                    (training_costs[-2] - training_costs[-1]) /
                    training_costs[-2])
                if training_cost_prop_change < \
                        training_cost_prop_change_threshold:
                    # Very small change in training cost - maybe we've converged
                    below_threshold_its += 1
                    if below_threshold_its >= 5:
                        # We've had enough iterations with very small changes:
                        # we've converged
                        log.info(
                            'Proportional change in training cost ({:g}) below '
                            '{:g} for five successive iterations: '
                            'converged'.format(
                                training_cost_prop_change,
                                training_cost_prop_change_threshold))
                        break
                    else:
                        log.info(
                            'Proportional change in training cost ({:g}) below '
                            '{:g} for {} successive iterations: waiting until '
                            'it is been low for five iterations'.format(
                                training_cost_prop_change,
                                training_cost_prop_change_threshold,
                                below_threshold_its))
                else:
                    # Reset the below threshold counter
                    below_threshold_its = 0

        if best_weights is not None:
            # Use the weights that gave us the best error on the validation set
            self.model.set_weights(best_weights)