def create_eval_engine(model, device): process_function = get_process_function(model, device) eval_engine = Engine(process_function) accuracy = Accuracy() accuracy.attach(eval_engine, "accuracy") recall = Recall(average=False) recall.attach(eval_engine, "recall") precision = Precision(average=False) precision.attach(eval_engine, "precision") f1 = (precision * recall * 2 / (precision + recall)) f1.attach(eval_engine, "f1") f2 = (precision * recall * 5 / ((4 * precision) + recall)) f2.attach(eval_engine, "f2") def Fbeta(r, p, beta): return torch.mean( (1 + beta**2) * p * r / (beta**2 * p + r + 1e-20)).item() avg_f1 = MetricsLambda(Fbeta, recall, precision, 1) avg_f1.attach(eval_engine, "average f1") avg_f2 = MetricsLambda(Fbeta, recall, precision, 2) avg_f2.attach(eval_engine, "average f2") avg_recall = Recall(average=True) avg_recall.attach(eval_engine, "average recall") avg_precision = Precision(average=True) avg_precision.attach(eval_engine, "average precision") return eval_engine
def test_state_metrics_ingredients_not_attached(): y_pred = torch.randint(0, 2, size=(15, 10, 4)).float() y = torch.randint(0, 2, size=(15, 10, 4)).long() def update_fn(engine, batch): y_pred, y = batch return y_pred, y evaluator = Engine(update_fn) precision = Precision(average=False) recall = Recall(average=False) F1 = precision * recall * 2 / (precision + recall + 1e-20) F1 = MetricsLambda(lambda t: torch.mean(t).item(), F1) F1.attach(evaluator, "F1") def data(y_pred, y): for i in range(y_pred.shape[0]): yield (y_pred[i], y[i]) d = data(y_pred, y) state = evaluator.run(d, max_epochs=1) assert set(state.metrics.keys()) == set(["F1"])
def test_integration(): np.random.seed(1) n_iters = 10 batch_size = 10 n_classes = 10 y_true = np.arange(0, n_iters * batch_size) % n_classes y_pred = 0.2 * np.random.rand(n_iters * batch_size, n_classes) for i in range(n_iters * batch_size): if np.random.rand() > 0.4: y_pred[i, y_true[i]] = 1.0 else: j = np.random.randint(0, n_classes) y_pred[i, j] = 0.7 y_true_batch_values = iter(y_true.reshape(n_iters, batch_size)) y_pred_batch_values = iter(y_pred.reshape(n_iters, batch_size, n_classes)) def update_fn(engine, batch): y_true_batch = next(y_true_batch_values) y_pred_batch = next(y_pred_batch_values) return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch) evaluator = Engine(update_fn) precision = Precision(average=False) recall = Recall(average=False) def Fbeta(r, p, beta): return torch.mean((1 + beta**2) * p * r / (beta**2 * p + r)).item() F1 = MetricsLambda(Fbeta, recall, precision, 1) precision.attach(evaluator, "precision") recall.attach(evaluator, "recall") F1.attach(evaluator, "f1") data = list(range(n_iters)) state = evaluator.run(data, max_epochs=1) precision_true = precision_score(y_true, np.argmax(y_pred, axis=-1), average=None) recall_true = recall_score(y_true, np.argmax(y_pred, axis=-1), average=None) f1_true = f1_score(y_true, np.argmax(y_pred, axis=-1), average='macro') precision = state.metrics['precision'].numpy() recall = state.metrics['recall'].numpy() assert precision_true == approx(precision), "{} vs {}".format( precision_true, precision) assert recall_true == approx(recall), "{} vs {}".format( recall_true, recall) assert f1_true == approx(state.metrics['f1']), "{} vs {}".format( f1_true, state.metrics['f1'])
def _test(): y_true = np.arange(0, n_iters * batch_size * dist.get_world_size()) % n_classes y_pred = 0.2 * np.random.rand( n_iters * batch_size * dist.get_world_size(), n_classes ) for i in range(n_iters * batch_size * dist.get_world_size()): if np.random.rand() > 0.4: y_pred[i, y_true[i]] = 1.0 else: j = np.random.randint(0, n_classes) y_pred[i, j] = 0.7 y_true = y_true.reshape(n_iters * dist.get_world_size(), batch_size) y_pred = y_pred.reshape(n_iters * dist.get_world_size(), batch_size, n_classes) def update_fn(engine, i): y_true_batch = y_true[i + rank * n_iters, ...] y_pred_batch = y_pred[i + rank * n_iters, ...] return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch) evaluator = Engine(update_fn) precision = Precision(average=False, device=device) recall = Recall(average=False, device=device) def Fbeta(r, p, beta): return torch.mean((1 + beta ** 2) * p * r / (beta ** 2 * p + r)).item() F1 = MetricsLambda(Fbeta, recall, precision, 1) F1.attach(evaluator, "f1") another_f1 = ( (1.0 + precision * recall * 2 / (precision + recall + 1e-20)).mean().item() ) another_f1.attach(evaluator, "ff1") data = list(range(n_iters)) state = evaluator.run(data, max_epochs=1) assert "f1" in state.metrics assert "ff1" in state.metrics f1_true = f1_score( y_true.ravel(), np.argmax(y_pred.reshape(-1, n_classes), axis=-1), average="macro", ) assert f1_true == approx(state.metrics["f1"]) assert 1.0 + f1_true == approx(state.metrics["ff1"])
def test_metrics_lambda(): m0 = ListGatherMetric(0) m1 = ListGatherMetric(1) m2 = ListGatherMetric(2) def process_function(engine, data): return data engine = Engine(process_function) m0_plus_m1 = MetricsLambda(lambda x, y: x + y, m0, m1) m2_plus_2 = MetricsLambda(lambda x, y: x + y, m2, 2) m0_plus_m1.attach(engine, 'm0_plus_m1') m2_plus_2.attach(engine, 'm2_plus_2') engine.run([[1, 10, 100]]) assert engine.state.metrics['m0_plus_m1'] == 11 assert engine.state.metrics['m2_plus_2'] == 102 engine.run([[2, 20, 200]]) assert engine.state.metrics['m0_plus_m1'] == 22 assert engine.state.metrics['m2_plus_2'] == 202
def create_evaluator(model, criterion, cfg): def _validation_step(_, batch): model.eval() with torch.no_grad(): x, y = batch_to_tensor(batch, cfg) x, y = x.to(cfg.device), y.to(cfg.device) y_pred, hidden = model(x) loss = criterion(y_pred, y) if cfg.multi_label: y_pred = (y_pred > 0).float() return y_pred, y, loss, hidden evaluator = Engine(_validation_step) accuracy = Accuracy(lambda x: x[0:2], is_multilabel=cfg.multi_label) accuracy.attach(evaluator, "acc") precision = Precision(lambda x: x[0:2], average=False, is_multilabel=cfg.multi_label) precision.attach(evaluator, 'precision') MetricsLambda(lambda t: torch.mean(t).item(), precision).attach(evaluator, "MP") recall = Recall(lambda x: x[0:2], average=False, is_multilabel=cfg.multi_label) recall.attach(evaluator, 'recall') MetricsLambda(lambda t: torch.mean(t).item(), recall).attach(evaluator, "MR") F1 = 2. * precision * recall / (precision + recall + 1e-20) f1 = MetricsLambda(lambda t: torch.mean(t).item(), F1) f1.attach(evaluator, "F1") Average(lambda x: x[2]).attach(evaluator, 'loss') return evaluator
def test_integration_ingredients_not_attached(): np.random.seed(1) n_iters = 10 batch_size = 10 n_classes = 10 y_true = np.arange(0, n_iters * batch_size, dtype="int64") % n_classes y_pred = 0.2 * np.random.rand(n_iters * batch_size, n_classes) for i in range(n_iters * batch_size): if np.random.rand() > 0.4: y_pred[i, y_true[i]] = 1.0 else: j = np.random.randint(0, n_classes) y_pred[i, j] = 0.7 y_true_batch_values = iter(y_true.reshape(n_iters, batch_size)) y_pred_batch_values = iter(y_pred.reshape(n_iters, batch_size, n_classes)) def update_fn(engine, batch): y_true_batch = next(y_true_batch_values) y_pred_batch = next(y_pred_batch_values) return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch) evaluator = Engine(update_fn) precision = Precision(average=False) recall = Recall(average=False) def Fbeta(r, p, beta): return torch.mean((1 + beta**2) * p * r / (beta**2 * p + r)).item() F1 = MetricsLambda(Fbeta, recall, precision, 1) F1.attach(evaluator, "f1") data = list(range(n_iters)) state = evaluator.run(data, max_epochs=1) f1_true = f1_score(y_true, np.argmax(y_pred, axis=-1), average="macro") assert f1_true == approx( state.metrics["f1"]), f"{f1_true} vs {state.metrics['f1']}"
def test_metrics_lambda(): m0 = ListGatherMetric(0) m1 = ListGatherMetric(1) m2 = ListGatherMetric(2) def process_function(engine, data): return data engine = Engine(process_function) def plus(this, other): return this + other m0_plus_m1 = MetricsLambda(plus, m0, other=m1) m2_plus_2 = MetricsLambda(plus, m2, 2) m0_plus_m1.attach(engine, "m0_plus_m1") m2_plus_2.attach(engine, "m2_plus_2") engine.run([[1, 10, 100]]) assert engine.state.metrics["m0_plus_m1"] == 11 assert engine.state.metrics["m2_plus_2"] == 102 engine.run([[2, 20, 200]]) assert engine.state.metrics["m0_plus_m1"] == 22 assert engine.state.metrics["m2_plus_2"] == 202
def test_metrics_lambda(): m0 = ListGatherMetric(0) m1 = ListGatherMetric(1) m2 = ListGatherMetric(2) def process_function(engine, data): return data engine = Engine(process_function) def plus(this, other): return this + other m0_plus_m1 = MetricsLambda(plus, m0, other=m1) m2_plus_2 = MetricsLambda(plus, m2, 2) m0_plus_m1.attach(engine, "m0_plus_m1") m2_plus_2.attach(engine, "m2_plus_2") engine.run([[1, 10, 100]]) assert engine.state.metrics["m0_plus_m1"] == 11 assert engine.state.metrics["m2_plus_2"] == 102 engine.run([[2, 20, 200]]) assert engine.state.metrics["m0_plus_m1"] == 22 assert engine.state.metrics["m2_plus_2"] == 202 # metrics are partially attached assert not m0.is_attached(engine) assert not m1.is_attached(engine) assert not m2.is_attached(engine) # a dependency is detached m0.detach(engine) # so the lambda metric is too assert not m0_plus_m1.is_attached(engine) # the lambda is attached again m0_plus_m1.attach(engine, "m0_plus_m1") assert m0_plus_m1.is_attached(engine) # metrics are always partially attached assert not m0.is_attached(engine) m0_plus_m1.detach(engine) assert not m0_plus_m1.is_attached(engine) # detached (and no longer partially attached) assert not m0.is_attached(engine)
def test_metrics_lambda_update_and_attach_together(): y_pred = torch.randint(0, 2, size=(15, 10, 4)).float() y = torch.randint(0, 2, size=(15, 10, 4)).long() def update_fn(engine, batch): y_pred, y = batch return y_pred, y engine = Engine(update_fn) precision = Precision(average=False) recall = Recall(average=False) def Fbeta(r, p, beta): return torch.mean((1 + beta**2) * p * r / (beta**2 * p + r)).item() F1 = MetricsLambda(Fbeta, recall, precision, 1) F1.attach(engine, "f1") with pytest.raises( ValueError, match=r"MetricsLambda is already attached to an engine"): F1.update((y_pred, y)) y_pred = torch.randint(0, 2, size=(15, 10, 4)).float() y = torch.randint(0, 2, size=(15, 10, 4)).long() F1 = MetricsLambda(Fbeta, recall, precision, 1) F1.update((y_pred, y)) engine = Engine(update_fn) with pytest.raises(ValueError, match=r"The underlying metrics are already updated"): F1.attach(engine, "f1") F1.reset() F1.attach(engine, "f1")
def train( trn_path: Path, save_dir: Path, dev_path: Optional[Path] = None, vocab_path: Optional[Path] = None, encoding: str = 'utf8', lr: float = 1e-3, max_epochs: int = 50, batch_size: int = 16, patience: int = 5, numeric: bool = False, device: Optional[str] = None, ) -> None: logging.info('Creating save directory if not exist in %s', save_dir) save_dir.mkdir() ### Read/create/load samples and vocab trn_samples = read_or_load_samples(trn_path, encoding=encoding) vocab = create_or_load_vocab(trn_samples, path=vocab_path) dev_samples = None if dev_path is not None: dev_samples = read_or_load_samples(dev_path, encoding=encoding, name='dev') ### Numericalize samples if not numeric: logging.info('Numericalizing train samples') trn_samples = list(vocab.apply_to(trn_samples)) if dev_samples is not None: logging.info('Numericalizing dev samples') dev_samples = list(vocab.apply_to(dev_samples)) ### Save vocab and samples fnames = ['vocab.pkl', 'train-samples.pkl', 'dev-samples.pkl'] objs = [vocab, trn_samples] if dev_samples is not None: objs.append(dev_samples) for fname, obj in zip(fnames, objs): save_path = save_dir / fname logging.info('Saving to %s', save_path) with open(save_path, 'wb') as f: pickle.dump(obj, f) ### Create model, optimizer, and loss fn logging.info('Creating language model') padding_idx = vocab['words']['<pad>'] max_width = get_max_filter_width([trn_samples, dev_samples]) model = create_lm( len(vocab['words']), len(vocab['chars']), padding_idx=padding_idx, filter_widths=list(range(1, max_width)), ) logging.info('Model created with %d parameters', sum(p.numel() for p in model.parameters())) optimizer = torch.optim.Adam(model.parameters(), lr=lr) loss_fn = LMLoss(padding_idx=padding_idx) ### Save model metadata metadata_path = save_dir / 'metadata.yml' logging.info('Saving model metadata to %s', metadata_path) metadata_path.write_text(dump(model), encoding='utf8') ### Prepare engines def batch2tensors( batch: Batch, device: Optional[str] = None, non_blocking: Optional[bool] = None, ) -> Tuple[dict, torch.LongTensor]: arr = batch.to_array(pad_with=padding_idx) tsr = { k: torch.from_numpy(v).to(device=device) for k, v in arr.items() } words = tsr['words'][:, :-1].contiguous() chars = tsr['chars'][:, :-1, :].contiguous() targets = tsr['words'][:, 1:].contiguous() return {'words': words, 'chars': chars}, targets trainer = create_supervised_trainer(model, optimizer, loss_fn, device=device, prepare_batch=batch2tensors) trn_evaluator = create_supervised_evaluator(model, device=device, prepare_batch=batch2tensors) dev_evaluator = create_supervised_evaluator(model, device=device, prepare_batch=batch2tensors) ### Attach metrics loss = Loss(loss_fn, batch_size=lambda tgt: (tgt != padding_idx).long().sum().item()) ppl = MetricsLambda(math.exp, loss) loss.attach(trn_evaluator, 'loss') loss.attach(dev_evaluator, 'loss') ppl.attach(trn_evaluator, 'ppl') ppl.attach(dev_evaluator, 'ppl') ### Attach timers epoch_timer = Timer() epoch_timer.attach(trainer, start=Events.EPOCH_STARTED, pause=Events.EPOCH_COMPLETED) ### Attach progress bars trn_pbar = ProgressBar(bar_format=None, unit='batch', desc='Training') trn_pbar.attach(trainer, output_transform=lambda loss: { 'loss': loss, 'ppl': math.exp(loss) }) eval_pbar = ProgressBar(bar_format=None, unit='sent', desc='Evaluating') eval_pbar.attach(trn_evaluator) eval_pbar.attach(dev_evaluator) ### Attach checkpointers if dev_samples is None: ckptr_kwargs: dict = {'save_interval': 1, 'n_saved': 5} ckptr_engine = trainer else: ckptr_kwargs = { 'score_function': lambda eng: -eng.state.metrics['ppl'], 'score_name': 'dev_ppl' } ckptr_engine = dev_evaluator ckptr = ModelCheckpoint(str(save_dir / 'checkpoints'), 'ckpt', save_as_state_dict=True, **ckptr_kwargs) ckptr_engine.add_event_handler(Events.EPOCH_COMPLETED, ckptr, { 'model': model, 'optimizer': optimizer }) ### Attach early stopper if dev_samples is not None: early_stopper = EarlyStopping(patience, lambda eng: -eng.state.metrics['ppl'], trainer) dev_evaluator.add_event_handler(Events.EPOCH_COMPLETED, early_stopper) ### Attach custom handlers @trainer.on(Events.EPOCH_STARTED) def start_epoch(engine: Engine) -> None: logging.info('[Epoch %d/%d] Starting', engine.state.epoch, engine.state.max_epochs) @trainer.on(Events.EPOCH_COMPLETED) def complete_epoch(engine: Engine) -> None: epoch = engine.state.epoch max_epochs = engine.state.max_epochs logging.info('[Epoch %d/%d] Done in %s', epoch, max_epochs, timedelta(seconds=epoch_timer.value())) logging.info('[Epoch %d/%d] Evaluating on train corpus', epoch, max_epochs) trn_evaluator.run(BatchIterator(trn_samples)) if dev_samples is not None: logging.info('[Epoch %d/%d] Evaluating on dev corpus', epoch, max_epochs) dev_evaluator.run(BatchIterator(dev_samples)) @trn_evaluator.on(Events.COMPLETED) @dev_evaluator.on(Events.COMPLETED) def print_metrics(engine: Engine) -> None: loss = engine.state.metrics['loss'] ppl = engine.state.metrics['ppl'] logging.info('||| loss %.4f | ppl %.4f', loss, ppl) ### Start training iterator = ShuffleIterator(trn_samples, key=lambda s: len(s['words'])) iterator = BatchIterator(iterator, batch_size=batch_size) try: trainer.run(iterator, max_epochs=max_epochs) except KeyboardInterrupt: logging.info('Interrupt detected, aborting training') trainer.terminate()
def create_evaluator(model, cfg): def _validation_step(_, batch): model.eval() with torch.no_grad(): x_char, x_type, y_word, y_syllable = batch_to_tensor(batch, cfg) x_char, x_type, y_word, y_syllable = (t.to( cfg.device) for t in [x_char, x_type, y_word, y_syllable]) logits_word, logits_syllable = model(x_char, x_type) loss, word_loss, syllable_loss, align_loss = model.joint_loss( logits_word, y_word, logits_syllable, y_syllable) return ((logits_word > 0.5).long(), y_word, (logits_syllable > 0.5).long(), y_syllable, loss, word_loss, syllable_loss, align_loss) evaluator = Engine(_validation_step) w_loss = Accuracy(lambda x: x[0:2]) w_loss.attach(evaluator, 'w_acc') s_acc = Accuracy(lambda x: x[2:4]) s_acc.attach(evaluator, 's_acc') Average(lambda x: x[4]).attach(evaluator, 'loss') Average(lambda x: x[5]).attach(evaluator, 'w_loss') Average(lambda x: x[6]).attach(evaluator, 's_loss') Average(lambda x: x[7]).attach(evaluator, 'a_loss') accuracy = Accuracy(lambda x: x[0:2]) accuracy.attach(evaluator, "acc") w_precision = Precision(lambda x: x[0:2]) w_precision.attach(evaluator, 'WP') MetricsLambda(lambda t: torch.mean(t).item(), w_precision).attach(evaluator, "WMP") s_precision = Precision(lambda x: x[2:4]) s_precision.attach(evaluator, 'SP') MetricsLambda(lambda t: torch.mean(t).item(), s_precision).attach(evaluator, "SMP") w_recall = Recall(lambda x: x[0:2]) w_recall.attach(evaluator, 'WR') MetricsLambda(lambda t: torch.mean(t).item(), w_recall).attach(evaluator, "WMR") s_recall = Recall(lambda x: x[2:4]) s_recall.attach(evaluator, 'SR') MetricsLambda(lambda t: torch.mean(t).item(), s_recall).attach(evaluator, "SMR") w_f1 = 2. * w_precision * w_recall / (w_precision + w_recall + 1e-20) w_f1 = MetricsLambda(lambda t: torch.mean(t).item(), w_f1) w_f1.attach(evaluator, "WF1") s_f1 = 2. * s_precision * s_recall / (s_precision + s_recall + 1e-20) s_f1 = MetricsLambda(lambda t: torch.mean(t).item(), s_f1) s_f1.attach(evaluator, "SF1") return evaluator