def evaluate(self, tst_data, save_dir=None, logger: logging.Logger = None, batch_size=None, output=False, **kwargs): """Evaluate test set. Args: tst_data: Test set, which is usually a file path. save_dir: The directory to save evaluation scores or predictions. logger: Logger for reporting progress. batch_size: Batch size for test dataloader. output: Whether to save outputs into some file. **kwargs: Not used. Returns: (metric, outputs) where outputs are the return values of ``evaluate_dataloader``. """ if not self.model: raise RuntimeError('Call fit or load before evaluate.') if isinstance(tst_data, str): tst_data = get_resource(tst_data) filename = os.path.basename(tst_data) else: filename = None if output is True: output = self.generate_prediction_filename(tst_data if isinstance(tst_data, str) else 'test.txt', save_dir) if logger is None: _logger_name = basename_no_ext(filename) if filename else None logger = self.build_logger(_logger_name, save_dir) if not batch_size: batch_size = self.config.get('batch_size', 32) data = self.build_dataloader(**merge_dict(self.config, data=tst_data, batch_size=batch_size, shuffle=False, device=self.devices[0], logger=logger, overwrite=True)) dataset = data while dataset and hasattr(dataset, 'dataset'): dataset = dataset.dataset num_samples = len(dataset) if dataset else None if output and isinstance(dataset, TransformableDataset): def add_idx(samples): for idx, sample in enumerate(samples): if sample: sample[IDX] = idx add_idx(dataset.data) if dataset.cache: add_idx(dataset.cache) criterion = self.build_criterion(**self.config) metric = self.build_metric(**self.config) start = time.time() outputs = self.evaluate_dataloader(data, criterion=criterion, filename=filename, output=output, input=tst_data, save_dir=save_dir, test=True, num_samples=num_samples, **merge_dict(self.config, batch_size=batch_size, metric=metric, logger=logger, **kwargs)) elapsed = time.time() - start if logger: if num_samples: logger.info(f'speed: {num_samples / elapsed:.0f} samples/second') else: logger.info(f'speed: {len(data) / elapsed:.0f} batches/second') return metric, outputs
def build(self, logger, **kwargs): self.transform.build_config() self.model = self.build_model(**merge_dict(self.config, training=kwargs.get('training', None), loss=kwargs.get('loss', None))) self.transform.lock_vocabs() optimizer = self.build_optimizer(**self.config) loss = self.build_loss( **self.config if 'loss' in self.config else dict(list(self.config.items()) + [('loss', None)])) # allow for different metrics = self.build_metrics(**merge_dict(self.config, metrics=kwargs.get('metrics', 'accuracy'), logger=logger, overwrite=True)) if not isinstance(metrics, list): if isinstance(metrics, tf.keras.metrics.Metric): metrics = [metrics] if not self.model.built: sample_inputs = self.sample_data if sample_inputs is not None: self.model(sample_inputs) else: if len(self.transform.output_shapes[0]) == 1 and self.transform.output_shapes[0][0] is None: x_shape = self.transform.output_shapes[0] else: x_shape = list(self.transform.output_shapes[0]) for i, shape in enumerate(x_shape): x_shape[i] = [None] + shape # batch + X.shape self.model.build(input_shape=x_shape) self.compile_model(optimizer, loss, metrics) return self.model, optimizer, loss, metrics
def load(self, save_dir: str, devices=None, verbose=HANLP_VERBOSE, **kwargs): """Load from a local/remote component. Args: save_dir: An identifier which can be a local path or a remote URL or a pre-defined string. devices: The devices this component will be moved onto. verbose: ``True`` to log loading progress. **kwargs: To override some configs. """ save_dir = get_resource(save_dir) # flash('Loading config and vocabs [blink][yellow]...[/yellow][/blink]') if devices is None and self.model: devices = self.devices self.load_config(save_dir, **kwargs) self.load_vocabs(save_dir) if verbose: flash('Building model [blink][yellow]...[/yellow][/blink]') self.model = self.build_model( **merge_dict(self.config, training=False, **kwargs, overwrite=True, inplace=True)) if verbose: flash('') self.load_weights(save_dir, **kwargs) self.to(devices) self.model.eval()
def predict(self, data: Union[str, List[str]], batch_size: int = None, **kwargs): if not data: return [] flat = isinstance(data, str) or isinstance(data, tuple) if flat: data = [data] samples = [] for idx, d in enumerate(data): sample = {IDX: idx} if self.config.text_b_key: sample[self.config.text_a_key] = d[0] sample[self.config.text_b_key] = d[1] else: sample[self.config.text_a_key] = d samples.append(sample) dataloader = self.build_dataloader(samples, sorting=False, **merge_dict(self.config, batch_size=batch_size, shuffle=False, device=self.device, overwrite=True) ) labels = [None] * len(data) vocab = self.vocabs.label for batch in dataloader: logits = self.feed_batch(batch) pred = logits.argmax(-1) pred = pred.tolist() for idx, tag in zip(batch[IDX], pred): labels[idx] = vocab.idx_to_token[tag] if flat: return labels[0] return labels
def predict(self, data: Union[List[str], List[List[str]]], batch_size: int = None, **kwargs): if not data: return [] flat = self.input_is_flat(data) if flat: data = [data] samples = self.build_samples(data) if not batch_size: batch_size = self.config.batch_size dataloader = self.build_dataloader(samples, device=self.devices[0], shuffle=False, **merge_dict(self.config, batch_size=batch_size, overwrite=True, **kwargs)) order = [] outputs = [] for batch in dataloader: out, mask = self.feed_batch(batch) self.decode_output(out, mask, batch) outputs.extend(self.prediction_to_human(out, batch)) order.extend(batch[IDX]) outputs = reorder(outputs, order) if flat: return outputs[0] return outputs
def predict(self, data: Union[str, List[str]], batch_size: int = None, **kwargs): if not data: return [] flat = self.input_is_flat(data) if flat: data = [data] samples = self.build_samples(data) dataloader = self.build_dataloader(samples, device=self.device, **merge_dict(self.config, batch_size=batch_size, overwrite=True)) outputs = [] orders = [] for idx, batch in enumerate(dataloader): out, mask = self.feed_batch(batch) prediction = self.decode_output(out, mask, batch, span_probs=None) # prediction = [x[0] for x in prediction] outputs.extend(prediction) orders.extend(batch[IDX]) outputs = reorder(outputs, orders) if flat: return outputs[0] return outputs
def load(self, save_dir: str, logger=hanlp.utils.log_util.logger, **kwargs): self.meta['load_path'] = save_dir save_dir = get_resource(save_dir) self.load_config(save_dir) self.load_vocabs(save_dir) self.build(**merge_dict(self.config, training=False, logger=logger, **kwargs, overwrite=True, inplace=True)) self.load_weights(save_dir, **kwargs) self.load_meta(save_dir)
def __call__(self, *args, **kwargs): """Predict on data fed by user. This method calls :meth:`~hanlp.common.torch_component.predict` but decorates it with ``torch.no_grad``. Args: *args: Sentences or tokens. **kwargs: Used in sub-classes. """ return super().__call__( *args, **merge_dict(self.config, overwrite=True, **kwargs))
def __call__(self, data, batch_size=None, **kwargs): """Predict on data fed by user. This method calls :meth:`~hanlp.common.torch_component.predict` but decorates it with ``torch.no_grad``. Args: data: Sentences or tokens. batch_size: Decoding batch size. **kwargs: Used in sub-classes. """ return super().__call__(data, **merge_dict(self.config, overwrite=True, batch_size=batch_size or self.config.get('batch_size', None), **kwargs))
def execute_training_loop(self, trn: DataLoader, dev: DataLoader, epochs, criterion, optimizer, metric, save_dir, logger, patience, **kwargs): max_e, max_metric = 0, -1 criterion = self.build_criterion() timer = CountdownTimer(epochs) ratio_width = len(f'{len(trn)}/{len(trn)}') scheduler = self.build_scheduler( **merge_dict(self.config, optimizer=optimizer, overwrite=True)) if not patience: patience = epochs for epoch in range(1, epochs + 1): logger.info(f"[yellow]Epoch {epoch} / {epochs}:[/yellow]") self.fit_dataloader(trn, criterion, optimizer, metric, logger, ratio_width=ratio_width) loss, dev_metric = self.evaluate_dataloader(dev, criterion, logger) if scheduler: if isinstance(scheduler, ReduceLROnPlateau): scheduler.step(dev_metric.score) else: scheduler.step(epoch) report_patience = f'Patience: {epoch - max_e}/{patience}' # save the model if it is the best so far if dev_metric > max_metric: self.save_weights(save_dir) max_e, max_metric = epoch, dev_metric report_patience = '[red]Saved[/red] ' stop = epoch - max_e >= patience if stop: timer.stop() timer.log( f'{report_patience} lr: {optimizer.param_groups[0]["lr"]:.4f}', ratio_percentage=False, newline=True, ratio=False) if stop: break timer.stop() if max_e != epoch: self.load_weights(save_dir) logger.info( f"Max score of dev is {max_metric.score:.2%} at epoch {max_e}") logger.info( f"{timer.elapsed_human} elapsed, average time of each epoch is {timer.elapsed_average_human}" )
def predict(self, data: Any, batch_size=None, batch_max_tokens=None, conll=True, **kwargs): if not data: return [] use_pos = self.use_pos flat = self.input_is_flat(data, use_pos) if flat: data = [data] samples = self.build_samples(data, use_pos) if not batch_max_tokens: batch_max_tokens = self.config.get('batch_max_tokens', None) if not batch_size: batch_size = self.config.batch_size dataloader = self.build_dataloader( samples, device=self.devices[0], shuffle=False, **merge_dict(self.config, batch_size=batch_size, batch_max_tokens=batch_max_tokens, overwrite=True, **kwargs)) predictions, build_data, data, order = self.before_outputs(data) for batch in dataloader: arc_scores, rel_scores, mask, puncts = self.feed_batch(batch) self.collect_outputs(arc_scores, rel_scores, mask, batch, predictions, order, data, use_pos, build_data) outputs = self.post_outputs(predictions, data, order, use_pos, build_data, conll=conll) if flat: return outputs[0] return outputs
def fit(self, trn_data, dev_data, save_dir, batch_size, epochs, devices=None, logger=None, seed=None, finetune: Union[bool, str] = False, eval_trn=True, _device_placeholder=False, **kwargs): """Fit to data, triggers the training procedure. For training set and dev set, they shall be local or remote files. Args: trn_data: Training set. dev_data: Development set. save_dir: The directory to save trained component. batch_size: The number of samples in a batch. epochs: Number of epochs. devices: Devices this component will live on. logger: Any :class:`logging.Logger` instance. seed: Random seed to reproduce this training. finetune: ``True`` to load from ``save_dir`` instead of creating a randomly initialized component. ``str`` to specify a different ``save_dir`` to load from. eval_trn: Evaluate training set after each update. This can slow down the training but provides a quick diagnostic for debugging. _device_placeholder: ``True`` to create a placeholder tensor which triggers PyTorch to occupy devices so other components won't take these devices as first choices. **kwargs: Hyperparameters used by sub-classes. Returns: Any results sub-classes would like to return. Usually the best metrics on training set. """ # Common initialization steps config = self._capture_config(locals()) if not logger: logger = self.build_logger('train', save_dir) if not seed: self.config.seed = 233 if isdebugging() else int(time.time()) set_seed(self.config.seed) logger.info(self._savable_config.to_json(sort=True)) if isinstance(devices, list) or devices is None or isinstance(devices, float): flash('[yellow]Querying CUDA devices [blink]...[/blink][/yellow]') devices = -1 if isdebugging() else cuda_devices(devices) flash('') # flash(f'Available GPUs: {devices}') if isinstance(devices, list): first_device = (devices[0] if devices else -1) elif isinstance(devices, dict): first_device = next(iter(devices.values())) elif isinstance(devices, int): first_device = devices else: first_device = -1 if _device_placeholder and first_device >= 0: _dummy_placeholder = self._create_dummy_placeholder_on(first_device) if finetune: if isinstance(finetune, str): self.load(finetune, devices=devices) else: self.load(save_dir, devices=devices) logger.info( f'Finetune model loaded with {sum(p.numel() for p in self.model.parameters() if p.requires_grad)}' f'/{sum(p.numel() for p in self.model.parameters())} trainable/total parameters.') self.on_config_ready(**self.config) trn = self.build_dataloader(**merge_dict(config, data=trn_data, batch_size=batch_size, shuffle=True, training=True, device=first_device, logger=logger, vocabs=self.vocabs, overwrite=True)) dev = self.build_dataloader(**merge_dict(config, data=dev_data, batch_size=batch_size, shuffle=False, training=None, device=first_device, logger=logger, vocabs=self.vocabs, overwrite=True)) if dev_data else None if not finetune: flash('[yellow]Building model [blink]...[/blink][/yellow]') self.model = self.build_model(**merge_dict(config, training=True)) flash('') logger.info(f'Model built with {sum(p.numel() for p in self.model.parameters() if p.requires_grad)}' f'/{sum(p.numel() for p in self.model.parameters())} trainable/total parameters.') assert self.model, 'build_model is not properly implemented.' _description = repr(self.model) if len(_description.split('\n')) < 10: logger.info(_description) self.save_config(save_dir) self.save_vocabs(save_dir) self.to(devices, logger) if _device_placeholder and first_device >= 0: del _dummy_placeholder criterion = self.build_criterion(**merge_dict(config, trn=trn)) optimizer = self.build_optimizer(**merge_dict(config, trn=trn, criterion=criterion)) metric = self.build_metric(**self.config) if hasattr(trn.dataset, '__len__') and dev and hasattr(dev.dataset, '__len__'): logger.info(f'{len(trn.dataset)}/{len(dev.dataset)} samples in trn/dev set.') trn_size = len(trn) // self.config.get('gradient_accumulation', 1) ratio_width = len(f'{trn_size}/{trn_size}') else: ratio_width = None return self.execute_training_loop(**merge_dict(config, trn=trn, dev=dev, epochs=epochs, criterion=criterion, optimizer=optimizer, metric=metric, logger=logger, save_dir=save_dir, devices=devices, ratio_width=ratio_width, trn_data=trn_data, dev_data=dev_data, eval_trn=eval_trn, overwrite=True))
def fit(self, trn_data, dev_data, save_dir, batch_size, epochs, run_eagerly=False, logger=None, verbose=True, finetune: str = None, **kwargs): self._capture_config(locals()) self.transform = self.build_transform(**self.config) if not save_dir: save_dir = tempdir_human() if not logger: logger = init_logger(name='train', root_dir=save_dir, level=logging.INFO if verbose else logging.WARN) logger.info('Hyperparameter:\n' + self.config.to_json()) num_examples = self.build_vocab(trn_data, logger) # assert num_examples, 'You forgot to return the number of training examples in your build_vocab' logger.info('Building...') train_steps_per_epoch = math.ceil(num_examples / batch_size) if num_examples else None self.config.train_steps = train_steps_per_epoch * epochs if num_examples else None model, optimizer, loss, metrics = self.build(**merge_dict(self.config, logger=logger, training=True)) logger.info('Model built:\n' + summary_of_model(self.model)) if finetune: finetune = get_resource(finetune) if os.path.isdir(finetune): finetune = os.path.join(finetune, 'model.h5') model.load_weights(finetune, by_name=True, skip_mismatch=True) logger.info(f'Loaded pretrained weights from {finetune} for finetuning') self.save_config(save_dir) self.save_vocabs(save_dir) self.save_meta(save_dir) trn_data = self.build_train_dataset(trn_data, batch_size, num_examples) dev_data = self.build_valid_dataset(dev_data, batch_size) callbacks = self.build_callbacks(save_dir, **merge_dict(self.config, overwrite=True, logger=logger)) # need to know #batches, otherwise progbar crashes dev_steps = math.ceil(self.num_samples_in(dev_data) / batch_size) checkpoint = get_callback_by_class(callbacks, tf.keras.callbacks.ModelCheckpoint) timer = Timer() try: history = self.train_loop(**merge_dict(self.config, trn_data=trn_data, dev_data=dev_data, epochs=epochs, num_examples=num_examples, train_steps_per_epoch=train_steps_per_epoch, dev_steps=dev_steps, callbacks=callbacks, logger=logger, model=model, optimizer=optimizer, loss=loss, metrics=metrics, overwrite=True)) except KeyboardInterrupt: print() if not checkpoint or checkpoint.best in (np.Inf, -np.Inf): self.save_weights(save_dir) logger.info('Aborted with model saved') else: logger.info(f'Aborted with model saved with best {checkpoint.monitor} = {checkpoint.best:.4f}') # noinspection PyTypeChecker history: tf.keras.callbacks.History() = get_callback_by_class(callbacks, tf.keras.callbacks.History) delta_time = timer.stop() best_epoch_ago = 0 if history and hasattr(history, 'epoch'): trained_epoch = len(history.epoch) logger.info('Trained {} epochs in {}, each epoch takes {}'. format(trained_epoch, delta_time, delta_time / trained_epoch if trained_epoch else delta_time)) save_json(history.history, io_util.path_join(save_dir, 'history.json'), cls=io_util.NumpyEncoder) monitor_history: List = history.history.get(checkpoint.monitor, None) if monitor_history: best_epoch_ago = len(monitor_history) - monitor_history.index(checkpoint.best) if checkpoint and monitor_history and checkpoint.best != monitor_history[-1]: logger.info(f'Restored the best model saved with best ' f'{checkpoint.monitor} = {checkpoint.best:.4f} ' f'saved {best_epoch_ago} epochs ago') self.load_weights(save_dir) # restore best model return history