def fit_chainer( config: dict, iterator: Union[DataLearningIterator, DataFittingIterator]) -> Chainer: """Fit and return the chainer described in corresponding configuration dictionary.""" chainer_config: dict = config['chainer'] chainer = Chainer(chainer_config['in'], chainer_config['out'], chainer_config.get('in_y')) for component_config in chainer_config['pipe']: component = from_params(component_config, mode='train') if 'fit_on' in component_config: component: Estimator targets = component_config['fit_on'] if isinstance(targets, str): targets = [targets] preprocessed = chainer.compute(*iterator.get_instances('train'), targets=targets) if len(component_config['fit_on']) == 1: preprocessed = [preprocessed] component.fit(*preprocessed) component.save() if 'fit_on_batch' in component_config: component: Estimator component.fit_batches(iterator, config['train']['batch_size']) component.save() if 'fit_on_batch_preprocess' in component_config: component: Estimator targets = component_config['fit_on_batch_preprocess'] if isinstance(targets, str): targets = [targets] data_train = chainer.compute(*iterator.get_instances('train'), targets=targets) data_valid = chainer.compute(*iterator.get_instances('valid'), targets=targets) component.fit_batches(data_train, data_valid, config['train']['batch_size'], config['train']['epochs']) component.save() if 'in' in component_config: c_in = component_config['in'] c_out = component_config['out'] in_y = component_config.get('in_y', None) main = component_config.get('main', False) chainer.append(component, c_in, c_out, in_y, main) return chainer
def _test_model( model: Chainer, metrics_functions: List[Metric], iterator: DataLearningIterator, batch_size=-1, data_type='valid', start_time: float = None, show_examples=False) -> Dict[str, Union[int, OrderedDict, str]]: if start_time is None: start_time = time.time() expected_outputs = list(set().union(model.out_params, *[m.inputs for m in metrics_functions])) outputs = {out: [] for out in expected_outputs} examples = 0 for x, y_true in iterator.gen_batches(batch_size, data_type, shuffle=False): examples += len(x) y_predicted = list( model.compute(list(x), list(y_true), targets=expected_outputs)) if len(expected_outputs) == 1: y_predicted = [y_predicted] for out, val in zip(outputs.values(), y_predicted): out += list(val) metrics = [(m.name, m.fn(*[outputs[i] for i in m.inputs])) for m in metrics_functions] report = { 'eval_examples_count': examples, 'metrics': prettify_metrics(metrics), 'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5))) } if show_examples: try: report['examples'] = [{ 'x': x_item, 'y_predicted': y_predicted_item, 'y_true': y_true_item } for x_item, y_predicted_item, y_true_item in zip( x, {k: outputs[k] for k in model.out_params}, y_true)] except NameError: log.warning( f'Could not log examples for {data_type}, assuming it\'s empty' ) return report
class FitTrainer: """ Trainer class for fitting and evaluating :class:`Estimators <deeppavlov.core.models.estimator.Estimator>` Args: chainer_config: ``"chainer"`` block of a configuration file batch_size: batch_size to use for partial fitting (if available) and evaluation, the whole dataset is used if ``batch_size`` is negative or zero (default is ``-1``) metrics: iterable of metrics where each metric can be a registered metric name or a dict of ``name`` and ``inputs`` where ``name`` is a registered metric name and ``inputs`` is a collection of parameter names from chainer’s inner memory that will be passed to the metric function; default value for ``inputs`` parameter is a concatenation of chainer’s ``in_y`` and ``out`` fields (default is ``('accuracy',)``) evaluation_targets: data types on which to evaluate trained pipeline (default is ``('valid', 'test')``) show_examples: a flag used to print inputs, expected outputs and predicted outputs for the last batch in evaluation logs (default is ``False``) tensorboard_log_dir: path to a directory where tensorboard logs can be stored, ignored if None (default is ``None``) max_test_batches: maximum batches count for pipeline testing and evaluation, ignored if negative (default is ``-1``) **kwargs: additional parameters whose names will be logged but otherwise ignored """ def __init__(self, chainer_config: dict, *, batch_size: int = -1, metrics: Iterable[Union[str, dict]] = ('accuracy',), evaluation_targets: Iterable[str] = ('valid', 'test'), show_examples: bool = False, tensorboard_log_dir: Optional[Union[str, Path]] = None, max_test_batches: int = -1, **kwargs) -> None: if kwargs: log.info(f'{self.__class__.__name__} got additional init parameters {list(kwargs)} that will be ignored:') self.chainer_config = chainer_config self._chainer = Chainer(chainer_config['in'], chainer_config['out'], chainer_config.get('in_y')) self.batch_size = batch_size self.metrics = parse_metrics(metrics, self._chainer.in_y, self._chainer.out_params) self.evaluation_targets = tuple(evaluation_targets) self.show_examples = show_examples self.max_test_batches = None if max_test_batches < 0 else max_test_batches self.tensorboard_log_dir: Optional[Path] = tensorboard_log_dir if tensorboard_log_dir is not None: try: # noinspection PyPackageRequirements # noinspection PyUnresolvedReferences import tensorflow except ImportError: log.warning('TensorFlow could not be imported, so tensorboard log directory' f'`{self.tensorboard_log_dir}` will be ignored') self.tensorboard_log_dir = None else: self.tensorboard_log_dir = expand_path(tensorboard_log_dir) self._tf = tensorflow self._built = False self._saved = False self._loaded = False def fit_chainer(self, iterator: Union[DataFittingIterator, DataLearningIterator]) -> None: """ Build the pipeline :class:`~deeppavlov.core.common.chainer.Chainer` and successively fit :class:`Estimator <deeppavlov.core.models.estimator.Estimator>` components using a provided data iterator """ if self._built: raise RuntimeError('Cannot fit already built chainer') for component_index, component_config in enumerate(self.chainer_config['pipe'], 1): component = from_params(component_config, mode='train') if 'fit_on' in component_config: component: Estimator targets = component_config['fit_on'] if isinstance(targets, str): targets = [targets] if self.batch_size > 0 and callable(getattr(component, 'partial_fit', None)): writer = None for i, (x, y) in enumerate(iterator.gen_batches(self.batch_size, shuffle=False)): preprocessed = self._chainer.compute(x, y, targets=targets) # noinspection PyUnresolvedReferences result = component.partial_fit(*preprocessed) if result is not None and self.tensorboard_log_dir is not None: if writer is None: writer = self._tf.summary.FileWriter(str(self.tensorboard_log_dir / f'partial_fit_{component_index}_log')) for name, score in result.items(): summary = self._tf.Summary() summary.value.add(tag='partial_fit/' + name, simple_value=score) writer.add_summary(summary, i) writer.flush() else: preprocessed = self._chainer.compute(*iterator.get_instances(), targets=targets) if len(targets) == 1: preprocessed = [preprocessed] result: Optional[Dict[str, Iterable[float]]] = component.fit(*preprocessed) if result is not None and self.tensorboard_log_dir is not None: writer = self._tf.summary.FileWriter(str(self.tensorboard_log_dir / f'fit_log_{component_index}')) for name, scores in result.items(): for i, score in enumerate(scores): summary = self._tf.Summary() summary.value.add(tag='fit/' + name, simple_value=score) writer.add_summary(summary, i) writer.flush() component.save() if 'in' in component_config: c_in = component_config['in'] c_out = component_config['out'] in_y = component_config.get('in_y', None) main = component_config.get('main', False) self._chainer.append(component, c_in, c_out, in_y, main) self._built = True def _load(self) -> None: if not self._loaded: self._chainer.destroy() self._chainer = build_model({'chainer': self.chainer_config}, load_trained=self._saved) self._loaded = True def get_chainer(self) -> Chainer: """Returns a :class:`~deeppavlov.core.common.chainer.Chainer` built from ``self.chainer_config`` for inference""" self._load() return self._chainer def train(self, iterator: Union[DataFittingIterator, DataLearningIterator]) -> None: """Calls :meth:`~fit_chainer` with provided data iterator as an argument""" self.fit_chainer(iterator) self._saved = True def test(self, data: Iterable[Tuple[Collection[Any], Collection[Any]]], metrics: Optional[Collection[Metric]] = None, *, start_time: Optional[float] = None, show_examples: Optional[bool] = None) -> dict: """ Calculate metrics and return reports on provided data for currently stored :class:`~deeppavlov.core.common.chainer.Chainer` Args: data: iterable of batches of inputs and expected outputs metrics: collection of metrics namedtuples containing names for report, metric functions and their inputs names (if omitted, ``self.metrics`` is used) start_time: start time for test report show_examples: a flag used to return inputs, expected outputs and predicted outputs for the last batch in a result report (if omitted, ``self.show_examples`` is used) Returns: a report dict containing calculated metrics, spent time value, examples count in tested data and maybe examples """ if start_time is None: start_time = time.time() if show_examples is None: show_examples = self.show_examples if metrics is None: metrics = self.metrics expected_outputs = list(set().union(self._chainer.out_params, *[m.inputs for m in metrics])) outputs = {out: [] for out in expected_outputs} examples = 0 data = islice(data, self.max_test_batches) for x, y_true in data: examples += len(x) y_predicted = list(self._chainer.compute(list(x), list(y_true), targets=expected_outputs)) if len(expected_outputs) == 1: y_predicted = [y_predicted] for out, val in zip(outputs.values(), y_predicted): out += list(val) if examples == 0: log.warning('Got empty data iterable for scoring') return {'eval_examples_count': 0, 'metrics': None, 'time_spent': str(datetime.timedelta(seconds=0))} # metrics_values = [(m.name, m.fn(*[outputs[i] for i in m.inputs])) for m in metrics] metrics_values = [] for metric in metrics: value = metric.fn(*[outputs[i] for i in metric.inputs]) metrics_values.append((metric.alias, value)) report = { 'eval_examples_count': examples, 'metrics': prettify_metrics(metrics_values), 'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5))) } if show_examples: y_predicted = zip(*[y_predicted_group for out_name, y_predicted_group in zip(expected_outputs, y_predicted) if out_name in self._chainer.out_params]) if len(self._chainer.out_params) == 1: y_predicted = [y_predicted_item[0] for y_predicted_item in y_predicted] report['examples'] = [{ 'x': x_item, 'y_predicted': y_predicted_item, 'y_true': y_true_item } for x_item, y_predicted_item, y_true_item in zip(x, y_predicted, y_true)] return report def evaluate(self, iterator: DataLearningIterator, evaluation_targets: Optional[Iterable[str]] = None, *, print_reports: bool = True) -> Dict[str, dict]: """ Run :meth:`test` on multiple data types using provided data iterator Args: iterator: :class:`~deeppavlov.core.data.data_learning_iterator.DataLearningIterator` used for evaluation evaluation_targets: iterable of data types to evaluate on print_reports: a flag used to print evaluation reports as json lines Returns: a dictionary with data types as keys and evaluation reports as values """ self._load() if evaluation_targets is None: evaluation_targets = self.evaluation_targets res = {} for data_type in evaluation_targets: data_gen = iterator.gen_batches(self.batch_size, data_type=data_type, shuffle=False) report = self.test(data_gen) res[data_type] = report if print_reports: print(json.dumps({data_type: report}, ensure_ascii=False, cls=NumpyArrayEncoder)) return res
def fit_chainer(config: dict, iterator: Union[DataLearningIterator, DataFittingIterator]) -> Chainer: """Fit and return the chainer described in corresponding configuration dictionary.""" chainer_config: dict = config['chainer'] chainer = Chainer(chainer_config['in'], chainer_config['out'], chainer_config.get('in_y')) for component_config in chainer_config['pipe']: component = from_params(component_config, mode='train') if ('fit_on' in component_config) and \ (not callable(getattr(component, 'partial_fit', None))): component: Estimator targets = component_config['fit_on'] if isinstance(targets, str): targets = [targets] preprocessed = chainer.compute(*iterator.get_instances('train'), targets=targets) if len(component_config['fit_on']) == 1: preprocessed = [preprocessed] result = component.fit(*preprocessed) if result is not None and config['train'].get('tensorboard_log_dir') is not None: import tensorflow as tf tb_log_dir = expand_path(config['train']['tensorboard_log_dir']) writer = tf.summary.FileWriter(str(tb_log_dir / 'fit_log')) for name, scores in result.items(): for i, score in enumerate(scores): summ = tf.Summary() summ.value.add(tag='fit/' + name, simple_value=score) writer.add_summary(summ, i) writer.flush() component.save() if 'fit_on_batch' in component_config: log.warning('`fit_on_batch` is deprecated and will be removed in future versions.' ' Please use `fit_on` instead.') if ('fit_on_batch' in component_config) or \ (('fit_on' in component_config) and callable(getattr(component, 'partial_fit', None))): component: Estimator targets = component_config.get('fit_on', component_config['fit_on_batch']) if isinstance(targets, str): targets = [targets] for i, data in enumerate(iterator.gen_batches(config['train']['batch_size'], shuffle=False)): preprocessed = chainer.compute(*data, targets=targets) if len(targets) == 1: preprocessed = [preprocessed] result = component.partial_fit(*preprocessed) if result is not None and config['train'].get('tensorboard_log_dir') is not None: if i == 0: import tensorflow as tf tb_log_dir = expand_path(config['train']['tensorboard_log_dir']) writer = tf.summary.FileWriter(str(tb_log_dir / 'fit_batches_log')) for name, score in result.items(): summ = tf.Summary() summ.value.add(tag='fit_batches/' + name, simple_value=score) writer.add_summary(summ, i) writer.flush() component.save() if 'in' in component_config: c_in = component_config['in'] c_out = component_config['out'] in_y = component_config.get('in_y', None) main = component_config.get('main', False) chainer.append(component, c_in, c_out, in_y, main) return chainer
def _train_batches(model: Chainer, iterator: DataLearningIterator, train_config: dict, metrics_functions: List[Metric], *, start_epoch_num: Optional[int] = None) -> NNModel: default_train_config = { 'epochs': 0, 'start_epoch_num': 0, 'max_batches': 0, 'batch_size': 1, 'metric_optimization': 'maximize', 'validation_patience': 5, 'val_every_n_epochs': 0, 'val_every_n_batches': 0, 'log_every_n_batches': 0, 'log_every_n_epochs': 0, 'validate_best': True, 'test_best': True, 'tensorboard_log_dir': None, } train_config = dict(default_train_config, **train_config) if 'train_metrics' in train_config: train_metrics_functions = _parse_metrics(train_config['train_metrics'], model.in_y, model.out_params) else: train_metrics_functions = metrics_functions expected_outputs = list(set().union(model.out_params, *[m.inputs for m in train_metrics_functions])) if train_config['metric_optimization'] == 'maximize': def improved(score, best): return score > best best = float('-inf') elif train_config['metric_optimization'] == 'minimize': def improved(score, best): return score < best best = float('inf') else: raise ConfigError('metric_optimization has to be one of {}'.format(['maximize', 'minimize'])) i = 0 epochs = start_epoch_num if start_epoch_num is not None else train_config['start_epoch_num'] examples = 0 saved = False patience = 0 log_on = train_config['log_every_n_batches'] > 0 or train_config['log_every_n_epochs'] > 0 outputs = {key: [] for key in expected_outputs} losses = [] start_time = time.time() break_flag = False if train_config['tensorboard_log_dir'] is not None: import tensorflow as tf tb_log_dir = expand_path(train_config['tensorboard_log_dir']) tb_train_writer = tf.summary.FileWriter(str(tb_log_dir / 'train_log')) tb_valid_writer = tf.summary.FileWriter(str(tb_log_dir / 'valid_log')) # validate first (important if model is pre-trained) if train_config['val_every_n_epochs'] > 0 or train_config['val_every_n_batches'] > 0: report = _test_model(model, metrics_functions, iterator, train_config['batch_size'], 'valid', start_time, train_config['show_examples']) report['epochs_done'] = epochs report['batches_seen'] = i report['train_examples_seen'] = examples metrics = list(report['metrics'].items()) m_name, score = metrics[0] if improved(score, best): patience = 0 log.info('New best {} of {}'.format(m_name, score)) best = score log.info('Saving model') model.save() saved = True else: patience += 1 log.info('Did not improve on the {} of {}'.format(m_name, best)) report['impatience'] = patience if train_config['validation_patience'] > 0: report['patience_limit'] = train_config['validation_patience'] model.process_event(event_name='after_validation', data=report) report = {'valid': report} print(json.dumps(report, ensure_ascii=False)) try: while True: for x, y_true in iterator.gen_batches(train_config['batch_size']): if log_on and len(train_metrics_functions) > 0: y_predicted = list(model.compute(list(x), list(y_true), targets=expected_outputs)) if len(expected_outputs) == 1: y_predicted = [y_predicted] for out, val in zip(outputs.values(), y_predicted): out += list(val) result = model.train_on_batch(x, y_true) if not isinstance(result, dict): result = {'loss': result} if result is not None else {} if 'loss' in result: losses.append(result['loss']) i += 1 examples += len(x) if train_config['log_every_n_batches'] > 0 and i % train_config['log_every_n_batches'] == 0: metrics = [(m.name, m.fn(*[outputs[i] for i in m.inputs])) for m in train_metrics_functions] report = { 'epochs_done': epochs, 'batches_seen': i, 'examples_seen': examples, 'metrics': prettify_metrics(metrics), 'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5))) } default_report_keys = list(report.keys()) report.update(result) if train_config['show_examples']: try: y_predicted = zip(*[y_predicted_group for out_name, y_predicted_group in zip(expected_outputs, y_predicted) if out_name in model.out_params]) if len(model.out_params) == 1: y_predicted = [y_predicted_item[0] for y_predicted_item in y_predicted] report['examples'] = [{ 'x': x_item, 'y_predicted': y_predicted_item, 'y_true': y_true_item } for x_item, y_predicted_item, y_true_item in zip(x, y_predicted, y_true)] except NameError: log.warning('Could not log examples as y_predicted is not defined') if losses: report['loss'] = sum(losses)/len(losses) losses = [] model.process_event(event_name='after_train_log', data=report) if train_config['tensorboard_log_dir'] is not None: summ = tf.Summary() for name, score in metrics: summ.value.add(tag='every_n_batches/' + name, simple_value=score) for name, score in report.items(): if name not in default_report_keys: summ.value.add(tag='every_n_batches/' + name, simple_value=score) tb_train_writer.add_summary(summ, i) tb_train_writer.flush() report = {'train': report} print(json.dumps(report, ensure_ascii=False)) for out in outputs.values(): out.clear() if train_config['val_every_n_batches'] > 0 and i % train_config['val_every_n_batches'] == 0: report = _test_model(model, metrics_functions, iterator, train_config['batch_size'], 'valid', start_time, train_config['show_examples']) report['epochs_done'] = epochs report['batches_seen'] = i report['train_examples_seen'] = examples metrics = list(report['metrics'].items()) if train_config['tensorboard_log_dir'] is not None: summ = tf.Summary() for name, score in metrics: summ.value.add(tag='every_n_batches/' + name, simple_value=score) tb_valid_writer.add_summary(summ, i) tb_valid_writer.flush() m_name, score = metrics[0] if improved(score, best): patience = 0 log.info('New best {} of {}'.format(m_name, score)) best = score log.info('Saving model') model.save() saved = True else: patience += 1 log.info('Did not improve on the {} of {}'.format(m_name, best)) report['impatience'] = patience if train_config['validation_patience'] > 0: report['patience_limit'] = train_config['validation_patience'] model.process_event(event_name='after_validation', data=report) report = {'valid': report} print(json.dumps(report, ensure_ascii=False)) if patience >= train_config['validation_patience'] > 0: log.info('Ran out of patience') break_flag = True break if i >= train_config['max_batches'] > 0: break_flag = True break report = { 'epochs_done': epochs, 'batches_seen': i, 'train_examples_seen': examples, 'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5))) } model.process_event(event_name='after_batch', data=report) if break_flag: break epochs += 1 report = { 'epochs_done': epochs, 'batches_seen': i, 'train_examples_seen': examples, 'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5))) } model.process_event(event_name='after_epoch', data=report) if train_config['log_every_n_epochs'] > 0 and epochs % train_config['log_every_n_epochs'] == 0\ and outputs: metrics = [(m.name, m.fn(*[outputs[i] for i in m.inputs])) for m in train_metrics_functions] report = { 'epochs_done': epochs, 'batches_seen': i, 'train_examples_seen': examples, 'metrics': prettify_metrics(metrics), 'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5))) } default_report_keys = list(report.keys()) report.update(result) if train_config['show_examples']: try: y_predicted = zip(*[y_predicted_group for out_name, y_predicted_group in zip(expected_outputs, y_predicted) if out_name in model.out_params]) if len(model.out_params) == 1: y_predicted = [y_predicted_item[0] for y_predicted_item in y_predicted] report['examples'] = [{ 'x': x_item, 'y_predicted': y_predicted_item, 'y_true': y_true_item } for x_item, y_predicted_item, y_true_item in zip(x, y_predicted, y_true)] except NameError: log.warning('Could not log examples') if losses: report['loss'] = sum(losses)/len(losses) losses = [] model.process_event(event_name='after_train_log', data=report) if train_config['tensorboard_log_dir'] is not None: summ = tf.Summary() for name, score in metrics: summ.value.add(tag='every_n_epochs/' + name, simple_value=score) for name, score in report.items(): if name not in default_report_keys: summ.value.add(tag='every_n_epochs/' + name, simple_value=score) tb_train_writer.add_summary(summ, epochs) tb_train_writer.flush() report = {'train': report} print(json.dumps(report, ensure_ascii=False)) for out in outputs.values(): out.clear() if train_config['val_every_n_epochs'] > 0 and epochs % train_config['val_every_n_epochs'] == 0: report = _test_model(model, metrics_functions, iterator, train_config['batch_size'], 'valid', start_time, train_config['show_examples']) report['epochs_done'] = epochs report['batches_seen'] = i report['train_examples_seen'] = examples metrics = list(report['metrics'].items()) if train_config['tensorboard_log_dir'] is not None: summ = tf.Summary() for name, score in metrics: summ.value.add(tag='every_n_epochs/' + name, simple_value=score) tb_valid_writer.add_summary(summ, epochs) tb_valid_writer.flush() m_name, score = metrics[0] if improved(score, best): patience = 0 log.info('New best {} of {}'.format(m_name, score)) best = score log.info('Saving model') model.save() saved = True else: patience += 1 log.info('Did not improve on the {} of {}'.format(m_name, best)) report['impatience'] = patience if train_config['validation_patience'] > 0: report['patience_limit'] = train_config['validation_patience'] model.process_event(event_name='after_validation', data=report) report = {'valid': report} print(json.dumps(report, ensure_ascii=False)) if patience >= train_config['validation_patience'] > 0: log.info('Ran out of patience') break if epochs >= train_config['epochs'] > 0: break except KeyboardInterrupt: log.info('Stopped training') if not saved: log.info('Saving model') model.save() return model