Пример #1
0
def get_tensorboard_logger(trainer: Engine, evaluators: ThreeEvaluators,
                           metric_names: List[str]) -> TensorboardLogger:
    """
    creates a ``tensorboard`` logger which read metrics from given evaluators and attaches it to a given trainer

    :param trainer: an ``ignite`` trainer to attach to
    :param evaluators: a triple of train, validation, and test evaluators to get metrics from
    :param metric_names: a list of metrics to log during validation and testing
    """
    tb_logger = TensorboardLogger(log_dir=f"runs/{datetime.now()}",
                                  flush_secs=1)
    training_loss = OutputHandler(
        "training",
        ["running_loss"],
        global_step_transform=global_step_from_engine(trainer),
    )
    tb_logger.attach(trainer, training_loss, Events.EPOCH_COMPLETED)
    validation_loss = OutputHandler(
        "validation",
        metric_names,
        global_step_transform=global_step_from_engine(trainer),
    )
    tb_logger.attach(evaluators.validation, validation_loss, Events.COMPLETED)
    test_loss = OutputHandler(
        "test",
        metric_names,
        global_step_transform=global_step_from_engine(trainer),
    )
    tb_logger.attach(evaluators.test, test_loss, Events.COMPLETED)
    return tb_logger
def test_output_handler_with_global_step_from_engine():

    mock_another_engine = MagicMock()
    mock_another_engine.state = State()
    mock_another_engine.state.epoch = 10
    mock_another_engine.state.output = 12.345

    wrapper = OutputHandler(
        "tag",
        output_transform=lambda x: {"loss": x},
        global_step_transform=global_step_from_engine(mock_another_engine),
    )

    mock_logger = MagicMock(spec=TensorboardLogger)
    mock_logger.writer = MagicMock()

    mock_engine = MagicMock()
    mock_engine.state = State()
    mock_engine.state.epoch = 1
    mock_engine.state.output = 0.123

    wrapper(mock_engine, mock_logger, Events.EPOCH_STARTED)
    assert mock_logger.writer.add_scalar.call_count == 1
    mock_logger.writer.add_scalar.assert_has_calls(
        [call("tag/loss", mock_engine.state.output, mock_another_engine.state.epoch)]
    )

    mock_another_engine.state.epoch = 11
    mock_engine.state.output = 1.123

    wrapper(mock_engine, mock_logger, Events.EPOCH_STARTED)
    assert mock_logger.writer.add_scalar.call_count == 2
    mock_logger.writer.add_scalar.assert_has_calls(
        [call("tag/loss", mock_engine.state.output, mock_another_engine.state.epoch)]
    )
Пример #3
0
    def create_callbacks(self):

        ## SETUP CALLBACKS
        print('[INFO] Creating callback functions for training loop...',
              end='')
        # Early Stopping - stops training if the validation loss does not decrease after 5 epochs
        handler = EarlyStopping(patience=self.config.EARLY_STOPPING_PATIENCE,
                                score_function=score_function_loss,
                                trainer=self.train_engine)
        self.evaluator.add_event_handler(Events.COMPLETED, handler)
        print('Early Stopping ({} epochs)...'.format(
            self.config.EARLY_STOPPING_PATIENCE),
              end='')

        val_checkpointer = Checkpoint(
            {"model": self.model},
            ClearMLSaver(),
            n_saved=1,
            score_function=score_function_acc,
            score_name="val_acc",
            filename_prefix='cub200_{}_ignite_best'.format(
                self.config.MODEL.MODEL_NAME),
            global_step_transform=global_step_from_engine(self.train_engine),
        )
        self.evaluator.add_event_handler(Events.EPOCH_COMPLETED,
                                         val_checkpointer)
        print('Model Checkpointing...', end='')
        print('Done')
Пример #4
0
    def add_tensorboard_logging(self, logging_dir=None):

        # Add TensorBoard logging
        if logging_dir is None:
            os.path.join(self.config.DIRS.WORKING_DIR, 'tb_logs')
        else:
            os.path.join(logging_dir, 'tb_logs')
        print('Tensorboard logging saving to:: {} ...'.format(logging_dir),
              end='')

        self.tb_logger = TensorboardLogger(log_dir=logging_dir)
        # Logging iteration loss
        self.tb_logger.attach_output_handler(
            engine=self.train_engine,
            event_name=Events.ITERATION_COMPLETED,
            tag='training',
            output_transform=lambda loss: {"batch loss": loss})
        # Logging epoch training metrics
        self.tb_logger.attach_output_handler(
            engine=self.train_evaluator,
            event_name=Events.EPOCH_COMPLETED,
            tag="training",
            metric_names=[
                "loss", "accuracy", "precision", "recall", "f1", "topKCatAcc"
            ],
            global_step_transform=global_step_from_engine(self.train_engine),
        )
        # Logging epoch validation metrics
        self.tb_logger.attach_output_handler(
            engine=self.evaluator,
            event_name=Events.EPOCH_COMPLETED,
            tag="validation",
            metric_names=[
                "loss", "accuracy", "precision", "recall", "f1", "topKCatAcc"
            ],
            global_step_transform=global_step_from_engine(self.train_engine),
        )
        # Attach the logger to the trainer to log model's weights as a histogram after each epoch
        self.tb_logger.attach(self.train_engine,
                              event_name=Events.EPOCH_COMPLETED,
                              log_handler=WeightsHistHandler(self.model))
        # Attach the logger to the trainer to log model's gradients as a histogram after each epoch
        self.tb_logger.attach(self.train_engine,
                              event_name=Events.EPOCH_COMPLETED,
                              log_handler=GradsHistHandler(self.model))
        print('Tensorboard Logging...', end='')
        print('done')
    def init_handlers(self, trainer: ie.Engine, evaluator: ie.Engine,
                      model: nn.Module, optimizer):
        self.tb_logger.attach(trainer,
                              log_handler=tbl.OutputHandler(
                                  tag='training', metric_names='all'),
                              event_name=ie.Events.ITERATION_COMPLETED)
        self.tb_logger.attach(trainer,
                              log_handler=tbl.OptimizerParamsHandler(
                                  optimizer, tag='training'),
                              event_name=ie.Events.ITERATION_COMPLETED)
        self.tb_logger.attach(trainer,
                              log_handler=tbl.OutputHandler(
                                  tag='train', metric_names='all'),
                              event_name=ie.Events.EPOCH_COMPLETED)
        self.tb_logger.attach(
            evaluator,
            log_handler=tbl.OutputHandler(
                tag='dev',
                metric_names='all',
                global_step_transform=tbl.global_step_from_engine(trainer)),
            event_name=ie.Events.EPOCH_COMPLETED)

        if self.opts.debug:
            self.tb_logger.attach(trainer,
                                  log_handler=tbl.OptimizerParamsHandler(
                                      optimizer, tag=self.DEBUG_TAG),
                                  event_name=ie.Events.ITERATION_COMPLETED)
            self.tb_logger.attach(trainer,
                                  log_handler=tbl.WeightsHistHandler(
                                      model, tag=self.DEBUG_TAG),
                                  event_name=ie.Events.ITERATION_COMPLETED)
            self.tb_logger.attach(trainer,
                                  log_handler=tbl.WeightsScalarHandler(
                                      model, tag=self.DEBUG_TAG),
                                  event_name=ie.Events.ITERATION_COMPLETED)
            self.tb_logger.attach(trainer,
                                  log_handler=tbl.GradsHistHandler(
                                      model, tag=self.DEBUG_TAG),
                                  event_name=ie.Events.EPOCH_COMPLETED)
            self.tb_logger.attach(trainer,
                                  log_handler=tbl.GradsScalarHandler(
                                      model, tag=self.DEBUG_TAG),
                                  event_name=ie.Events.ITERATION_COMPLETED)
            self.tb_logger.attach(trainer,
                                  log_handler=tbl.GradsHistHandler(
                                      model, tag=self.DEBUG_TAG),
                                  event_name=ie.Events.EPOCH_COMPLETED)
Пример #6
0
    def _create_ingite_model_checkpointer(self, best_model_only=True):
        '''
        Function to create an ingite model checkpointer based on validation accuracy (best model == True), or at every epoch (best model == False)
        '''

        print('Model Checkpointing...', end='')
        if best_model_only:
            print('best model checkpointing...', end='')
            # best model checkpointer, based on validation accuracy.
            self.model_checkpointer = ModelCheckpoint(
                dirname=self.config.DIRS.WORKING_DIR,
                filename_prefix='caltech_birds_ignite_best',
                score_function=score_function_acc,
                score_name='val_acc',
                n_saved=2,
                create_dir=True,
                save_as_state_dict=True,
                require_empty=False,
                global_step_transform=global_step_from_engine(
                    self.train_engine))
            self.evaluator.add_event_handler(
                Events.COMPLETED, self.model_checkpointer,
                {self.config.MODEL.MODEL_NAME: self.model})
        else:
            # Checkpoint the model
            # iteration checkpointer
            print('every iteration model checkpointing...', end='')
            self.model_checkpointer = ModelCheckpoint(
                dirname=self.config.DIRS.WORKING_DIR,
                filename_prefix='caltech_birds_ignite',
                n_saved=2,
                create_dir=True,
                save_as_state_dict=True,
                require_empty=False)
            self.train_engine.add_event_handler(
                Events.EPOCH_COMPLETED, self.model_checkpointer,
                {self.config.MODEL.MODEL_NAME: self.model})

        print('Done')
Пример #7
0
def run(train_batch_size, val_batch_size, epochs, lr, momentum, log_dir):
    train_loader, val_loader = get_data_loaders(train_batch_size,
                                                val_batch_size)
    model = Net()
    device = "cpu"

    if torch.cuda.is_available():
        device = "cuda"

    model.to(device)  # Move model before creating optimizer
    optimizer = SGD(model.parameters(), lr=lr, momentum=momentum)
    criterion = nn.CrossEntropyLoss()
    trainer = create_supervised_trainer(model,
                                        optimizer,
                                        criterion,
                                        device=device)
    trainer.logger = setup_logger("Trainer")

    if sys.version_info > (3, ):
        from ignite.contrib.metrics.gpu_info import GpuInfo

        try:
            GpuInfo().attach(trainer)
        except RuntimeError:
            print(
                "INFO: By default, in this example it is possible to log GPU information (used memory, utilization). "
                "As there is no pynvml python package installed, GPU information won't be logged. Otherwise, please "
                "install it : `pip install pynvml`")

    metrics = {"accuracy": Accuracy(), "loss": Loss(criterion)}

    train_evaluator = create_supervised_evaluator(model,
                                                  metrics=metrics,
                                                  device=device)
    train_evaluator.logger = setup_logger("Train Evaluator")
    validation_evaluator = create_supervised_evaluator(model,
                                                       metrics=metrics,
                                                       device=device)
    validation_evaluator.logger = setup_logger("Val Evaluator")

    @trainer.on(Events.EPOCH_COMPLETED)
    def compute_metrics(engine):
        train_evaluator.run(train_loader)
        validation_evaluator.run(val_loader)

    tb_logger = TensorboardLogger(log_dir=log_dir)

    tb_logger.attach_output_handler(
        trainer,
        event_name=Events.ITERATION_COMPLETED(every=100),
        tag="training",
        output_transform=lambda loss: {"batchloss": loss},
        metric_names="all",
    )

    for tag, evaluator in [("training", train_evaluator),
                           ("validation", validation_evaluator)]:
        tb_logger.attach_output_handler(
            evaluator,
            event_name=Events.EPOCH_COMPLETED,
            tag=tag,
            metric_names=["loss", "accuracy"],
            global_step_transform=global_step_from_engine(trainer),
        )

    tb_logger.attach_opt_params_handler(
        trainer,
        event_name=Events.ITERATION_COMPLETED(every=100),
        optimizer=optimizer)

    tb_logger.attach(trainer,
                     log_handler=WeightsScalarHandler(model),
                     event_name=Events.ITERATION_COMPLETED(every=100))

    tb_logger.attach(trainer,
                     log_handler=WeightsHistHandler(model),
                     event_name=Events.EPOCH_COMPLETED(every=100))

    tb_logger.attach(trainer,
                     log_handler=GradsScalarHandler(model),
                     event_name=Events.ITERATION_COMPLETED(every=100))

    tb_logger.attach(trainer,
                     log_handler=GradsHistHandler(model),
                     event_name=Events.EPOCH_COMPLETED(every=100))

    def score_function(engine):
        return engine.state.metrics["accuracy"]

    model_checkpoint = ModelCheckpoint(
        log_dir,
        n_saved=2,
        filename_prefix="best",
        score_function=score_function,
        score_name="validation_accuracy",
        global_step_transform=global_step_from_engine(trainer),
    )
    validation_evaluator.add_event_handler(Events.COMPLETED, model_checkpoint,
                                           {"model": model})

    # kick everything off
    trainer.run(train_loader, max_epochs=epochs)

    tb_logger.close()
Пример #8
0
def train_network(model: nn.Module, training_loader: DataLoader,
                  validation_loader: DataLoader):
    """Trains the given neural network model.

    Parameters
    ----------
    model (nn.Module): The PyTorch model to be trained

    training_loader (DataLoader): Training data loader

    validation_loader (DataLoader): Validation data loader
    """
    device = "cuda:0" if cast(Any, torch).cuda.is_available() else "cpu"

    if device == "cuda:0":
        model.cuda()

    optimizer = cast(Any, torch).optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()

    trainer = create_supervised_trainer(model,
                                        optimizer,
                                        criterion,
                                        device=device)

    save_handler = Checkpoint(
        {
            "model": model,
            "optimizer": optimizer,
            "trainer": trainer
        },
        DiskSaver("dist/models", create_dir=True),
        n_saved=2,
    )
    trainer.add_event_handler(Events.EPOCH_COMPLETED(every=100), save_handler)

    # Create a logger
    tb_logger = TensorboardLogger(log_dir="logs/training" +
                                  datetime.now().strftime("-%Y%m%d-%H%M%S"),
                                  flush_secs=1)

    tb_logger.attach_output_handler(
        trainer,
        event_name=Events.ITERATION_COMPLETED,
        tag="training",
        output_transform=lambda loss: {"loss": loss},
    )

    # Training evaluator
    training_evaluator = create_supervised_evaluator(model,
                                                     metrics={
                                                         "r2": R2Score(),
                                                         "MSELoss":
                                                         Loss(criterion)
                                                     },
                                                     device=device)

    tb_logger.attach_output_handler(
        training_evaluator,
        event_name=Events.EPOCH_COMPLETED,
        tag="training",
        metric_names=["MSELoss", "r2"],
        global_step_transform=global_step_from_engine(trainer),
    )

    # Validation evaluator
    evaluator = create_supervised_evaluator(model,
                                            metrics={
                                                "r2": R2Score(),
                                                "MSELoss": Loss(criterion)
                                            },
                                            device=device)

    tb_logger.attach_output_handler(
        evaluator,
        event_name=Events.EPOCH_COMPLETED,
        tag="validation",
        metric_names=["MSELoss", "r2"],
        global_step_transform=global_step_from_engine(trainer),
    )

    @trainer.on(Events.EPOCH_COMPLETED(every=10))
    def log_training_results(trainer):
        training_evaluator.run(training_loader)

        metrics = training_evaluator.state.metrics
        print(
            f"Training Results - Epoch: {trainer.state.epoch}",
            f" Avg r2: {metrics['r2']:.2f} Avg loss: {metrics['MSELoss']:.2f}",
        )

    @trainer.on(Events.EPOCH_COMPLETED(every=10))
    def log_validation_results(trainer):
        evaluator.run(validation_loader)

        metrics = evaluator.state.metrics
        print(
            f"Validation Results - Epoch: {trainer.state.epoch}",
            f" Avg r2: {metrics['r2']:.2f} Avg loss: {metrics['MSELoss']:.2f}\n",
        )

    trainer.run(training_loader, max_epochs=int(1e6))
Пример #9
0
def trainer(
    train_batch,
    evaluate_batch,
    evaluate_data_loaders,
    metrics,
    optimizers,
):
    '''
    Create standard trainer with evaluators.

    Parameters
    ----------
    train_batch : function
        function that trains on given batch
    evaluate_batch : function
        function that evaluates a given batch
    evaluate_data_loaders: list
        data loaders that yield batches to evaluate on
    metrics : dict
        dict with one dict each for 'train' and evaluate data loader. Wrap a
        metric with trainer.Progress to show in progress bar.
    optimizers : dict
        dict with optimizers for logging

    Returns
    -------
    tuple
        trainer engine
        list of evaluator engines
        tensorboard logger
    '''

    trainer = ignite.engine.Engine(train_batch)

    for name, metric in metrics.get(PROGRESS_DESC, dict()).items():
        metric.attach(trainer, name)

    for name, metric in metrics.get(TRAIN_DESC, dict()).items():
        metric.attach(trainer, name)

    evaluators = {
        evaluator_name: ignite.engine.Engine(evaluate_batch)
        for evaluator_name in evaluate_data_loaders.keys()
    }

    for evaluator_name, evaluator in evaluators.items():
        for metric_name, metric in metrics[evaluator_name].items():
            metric.attach(evaluator, metric_name)

    tensorboard_logger = TensorboardLogger(log_dir='tb')

    EpochLogger().attach(trainer)

    # Order of attaching progress bars is important for vscode / atom
    ProgressBar(desc=TRAIN_DESC).attach(trainer,
                                        metric_names=list(
                                            metrics.get(PROGRESS_DESC,
                                                        dict()).keys()))
    tensorboard_logger.attach(
        trainer,
        OutputHandler(
            tag=PROGRESS_DESC,
            metric_names=list(metrics.get(PROGRESS_DESC, dict()).keys()),
        ),
        Events.ITERATION_COMPLETED,
    )

    MetricsLogger(TRAIN_DESC).attach(trainer,
                                     metrics.get(TRAIN_DESC, dict()).keys())
    tensorboard_logger.attach(
        trainer,
        OutputHandler(
            tag=TRAIN_DESC,
            metric_names=list(metrics.get(TRAIN_DESC, dict()).keys()),
        ),
        Events.ITERATION_COMPLETED,
    )

    def run_evaluator(evaluator_desc):
        return lambda engine: evaluators[evaluator_desc].run(
            evaluate_data_loaders[evaluator_desc])

    for evaluator_desc, evaluator in evaluators.items():
        evaluator_metric_names = list(metrics[evaluator_desc].keys())

        trainer.add_event_handler(
            Events.EPOCH_COMPLETED,
            run_evaluator(evaluator_desc),
        )

        ProgressBar(desc=evaluator_desc).attach(evaluator)
        MetricsLogger(evaluator_desc).attach(evaluator, evaluator_metric_names)
        tensorboard_logger.attach(
            evaluator,
            OutputHandler(
                tag=evaluator_desc,
                metric_names=evaluator_metric_names,
                global_step_transform=global_step_from_engine(trainer),
            ),
            Events.EPOCH_COMPLETED,
        )

    if type(optimizers) is not dict:
        optimizers = dict(optimizer=optimizers)

    for name, optimizer in optimizers.items():
        tensorboard_logger.attach(
            trainer,
            log_handler=OptimizerParamsHandler(
                tag=f'{TRAIN_DESC}/{name}',
                param_name='lr',
                optimizer=optimizer,
            ),
            event_name=Events.ITERATION_COMPLETED,
        )

    return trainer, evaluators, tensorboard_logger
Пример #10
0
# Add TensorBoard logging
tb_logger = TensorboardLogger(log_dir=os.path.join(working_dir,'tb_logs'))
# Logging iteration loss
tb_logger.attach_output_handler(
    engine=trainer, 
    event_name=Events.ITERATION_COMPLETED, 
    tag='training', 
    output_transform=lambda loss: {"batch loss": loss}
    )
# Logging epoch training metrics
tb_logger.attach_output_handler(
    engine=train_evaluator,
    event_name=Events.EPOCH_COMPLETED,
    tag="training",
    metric_names=["loss", "accuracy", "precision", "recall", "f1", "topKCatAcc"],
    global_step_transform=global_step_from_engine(trainer),
)
# Logging epoch validation metrics
tb_logger.attach_output_handler(
    engine=evaluator,
    event_name=Events.EPOCH_COMPLETED,
    tag="validation",
    metric_names=["loss", "accuracy", "precision", "recall", "f1", "topKCatAcc"],
    global_step_transform=global_step_from_engine(trainer),
)
# Attach the logger to the trainer to log model's weights as a histogram after each epoch
tb_logger.attach(
    trainer,
    event_name=Events.EPOCH_COMPLETED,
    log_handler=WeightsHistHandler(model)
)
Пример #11
0
def run_training(
    model,
    optimizer,
    scheduler,
    output_path,
    train_loader,
    val_loader,
    epochs,
    patience,
    epochs_pretrain,
    mixed_precision,
    classes_weights,
):

    # trainer
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if classes_weights is not None:
        classes_weights = classes_weights.to(device)
    crit = nn.CrossEntropyLoss(weight=classes_weights)
    metrics = {"accuracy": Accuracy(), "loss": Loss(crit)}
    model.to(device)
    trainer = create_supervised_trainer_with_pretraining(
        model,
        optimizer,
        crit,
        device=device,
        epochs_pretrain=epochs_pretrain,
        mixed_precision=mixed_precision,
    )
    train_evaluator = create_supervised_evaluator(model, metrics=metrics, device=device)
    val_evaluator = create_supervised_evaluator(model, metrics=metrics, device=device)

    # Out paths
    path_ckpt = os.path.join(output_path, "model_ckpt")
    log_dir = os.path.join(output_path, "log_dir")
    os.makedirs(log_dir, exist_ok=True)

    # tensorboard
    tb_logger = TensorboardLogger(log_dir=log_dir)
    tb_logger.attach_output_handler(
        train_evaluator,
        event_name=Events.EPOCH_COMPLETED,
        tag="training",
        metric_names=["accuracy", "loss"],
    )
    tb_logger.attach_output_handler(
        val_evaluator,
        event_name=Events.EPOCH_COMPLETED,
        tag="validation",
        metric_names=["accuracy", "loss"],
        global_step_transform=global_step_from_engine(trainer),
    )

    # training progress
    pbar = ProgressBar(persist=True, position=0)
    pbar.attach(trainer, metric_names="all")

    def log_training_results(engine):
        train_evaluator.run(train_loader)
        val_evaluator.run(val_loader)
        train_loss = train_evaluator.state.metrics["loss"]
        val_loss = val_evaluator.state.metrics["loss"]
        train_acc = train_evaluator.state.metrics["accuracy"]
        val_acc = val_evaluator.state.metrics["accuracy"]
        pbar.log_message(
            "Training Results - Epoch: {}  Loss: {:.6f}  Accuracy: {:.6f}".format(
                engine.state.epoch, train_loss, train_acc
            )
        )
        pbar.log_message(
            "Validation Results - Epoch: {}  Loss: {:.6f}  Accuracy: {:.6f}".format(
                engine.state.epoch, val_loss, val_acc
            )
        )
        pbar.n = pbar.last_print_n = 0

    trainer.add_event_handler(Events.EPOCH_COMPLETED, log_training_results)

    # def get_val_loss(engine):
    # 	return -engine.state.metrics['loss']
    def get_val_acc(engine):
        return engine.state.metrics["accuracy"]

    # checkpoint and early stopping
    checkpointer = ModelCheckpoint(
        path_ckpt,
        "model",
        score_function=get_val_acc,
        score_name="accuracy",
        require_empty=False,
    )
    early_stopper = EarlyStopping(patience, get_val_acc, trainer)

    to_save = {"optimizer": optimizer, "model": model}
    if scheduler is not None:
        to_save["scheduler"] = scheduler
    val_evaluator.add_event_handler(Events.COMPLETED, checkpointer, to_save)
    val_evaluator.add_event_handler(Events.COMPLETED, early_stopper)
    if scheduler is not None:
        trainer.add_event_handler(Events.ITERATION_STARTED, scheduler)

    # free resources
    trainer.add_event_handler(Events.ITERATION_COMPLETED, lambda _: _empty_cache())
    train_evaluator.add_event_handler(
        Events.ITERATION_COMPLETED, lambda _: _empty_cache()
    )
    val_evaluator.add_event_handler(
        Events.ITERATION_COMPLETED, lambda _: _empty_cache()
    )

    trainer.run(train_loader, max_epochs=epochs)
    tb_logger.close()

    # Evaluation with best model
    model.load_state_dict(
        torch.load(glob.glob(os.path.join(path_ckpt, "*.pt*"))[0])["model"]
    )
    train_evaluator = create_supervised_evaluator(model, metrics=metrics, device=device)
    val_evaluator = create_supervised_evaluator(model, metrics=metrics, device=device)

    train_evaluator.run(train_loader)
    val_evaluator.run(val_loader)

    _pretty_print("Evaluating best model")
    pbar.log_message(
        "Best model on training set - Loss: {:.6f}  Accuracy: {:.6f}".format(
            train_evaluator.state.metrics["loss"],
            train_evaluator.state.metrics["accuracy"],
        )
    )
    pbar.log_message(
        "Best model on validation set - Loss: {:.6f}  Accuracy: {:.6f}".format(
            val_evaluator.state.metrics["loss"], val_evaluator.state.metrics["accuracy"]
        )
    )

    return model, train_evaluator.state.metrics, val_evaluator.state.metrics
Пример #12
0
    def fit(self,
            train_loader: _data.DataLoader,
            val_loader: _data.DataLoader,
            epochs: int = 1,
            batches: int = None,
            learning_rate: float = 1e-3) -> None:
        if batches is None:
            batches = VocalExtractor.get_number_of_batches(train_loader)

        loss_fn = nn.BCELoss()
        optimizer = _optim.Adam(self.model.parameters(), lr=learning_rate)

        trainer = _engine.create_supervised_trainer(self.model,
                                                    optimizer,
                                                    loss_fn,
                                                    device=self.device)

        _metrics.RunningAverage(output_transform=lambda x: x,
                                device=self.device).attach(trainer, 'loss')
        progressbar = _chandlers.ProgressBar(
            bar_format=
            "{desc}[{n_fmt}/{total_fmt}] {percentage:3.0f}%|{bar:20}| "
            "[{elapsed}<{remaining}]{postfix}",
            persist=True,
            ascii=" #")
        progressbar.attach(trainer, ['loss'])

        def get_metrics_fn() -> Dict[str, _metrics.Metric]:
            def rounded_transform(output):
                y_pred, y = output
                return torch.round(y_pred), y

            transform = rounded_transform
            accuracy = _metrics.Accuracy(transform, device=self.device)
            precision = _metrics.Precision(transform, device=self.device)
            recall = _metrics.Recall(transform, device=self.device)
            f1 = precision * recall * 2 / (precision + recall + 1e-20)
            return {
                'loss': _metrics.Loss(loss_fn),
                'accuracy': accuracy,
                'precision': precision,
                'recall': recall,
                'f1': f1
            }

        evaluator = _engine.create_supervised_evaluator(
            self.model, metrics=get_metrics_fn(), device=self.device)

        score_fn_name = "f1"

        def score_function(engine: _engine.Engine):
            return engine.state.metrics[score_fn_name]

        best_model_saver = _handlers.ModelCheckpoint(
            dirname="best_models",
            filename_prefix="vocal_extractor",
            score_name=score_fn_name,
            score_function=score_function,
            n_saved=5,
            create_dir=True)
        evaluator.add_event_handler(_engine.Events.COMPLETED, best_model_saver,
                                    {"model": self.model})

        each_model_saver = _handlers.ModelCheckpoint(
            dirname="all_models",
            filename_prefix="vocal_extractor",
            score_name=score_fn_name,
            score_function=score_function,
            n_saved=None,
            create_dir=True)
        evaluator.add_event_handler(_engine.Events.COMPLETED, each_model_saver,
                                    {"model": self.model})

        @trainer.on(_engine.Events.EPOCH_COMPLETED)
        def on_epoch_completed(engine: _engine.Engine) -> None:
            metrics = VocalExtractor.compute_metrics(val_loader, evaluator)
            string = ", ".join(f"val_{k}: {v:.4f}" for k, v in metrics.items())
            progressbar.log_message(string + "\n")

        with _tb_logger.TensorboardLogger(log_dir="tb_logs") as tb_logger:
            global_step = _tb_logger.global_step_from_engine(trainer)

            train_running_loss_log_handler = _tb_logger.OutputHandler(
                tag="training", output_transform=lambda x: {'running_loss': x})
            tb_logger.attach(trainer,
                             log_handler=train_running_loss_log_handler,
                             event_name=_engine.Events.ITERATION_COMPLETED)

            val_metrics_log_handler = _tb_logger.OutputHandler(
                tag="validation",
                metric_names=[name for name, _ in get_metrics_fn().items()],
                global_step_transform=global_step)
            tb_logger.attach(evaluator,
                             log_handler=val_metrics_log_handler,
                             event_name=_engine.Events.EPOCH_COMPLETED)

            tb_logger.attach(
                trainer,
                log_handler=_tb_logger.OptimizerParamsHandler(optimizer),
                event_name=_engine.Events.ITERATION_STARTED)

            tb_logger.attach(trainer,
                             log_handler=_tb_logger.WeightsScalarHandler(
                                 self.model),
                             event_name=_engine.Events.ITERATION_COMPLETED)
            tb_logger.attach(trainer,
                             log_handler=_tb_logger.WeightsHistHandler(
                                 self.model),
                             event_name=_engine.Events.EPOCH_COMPLETED)

            tb_logger.attach(trainer,
                             log_handler=_tb_logger.GradsScalarHandler(
                                 self.model),
                             event_name=_engine.Events.ITERATION_COMPLETED)
            tb_logger.attach(trainer,
                             log_handler=_tb_logger.GradsHistHandler(
                                 self.model),
                             event_name=_engine.Events.EPOCH_COMPLETED)

        torchsummary.summary(self.model,
                             input_size=(1, self.freq_bins, self.time_bins),
                             batch_size=train_loader.batch_size,
                             device=self.device)
        trainer.run(data=train_loader, epoch_length=batches, max_epochs=epochs)
Пример #13
0
def training(rank, config):
    rank = idist.get_rank()
    manual_seed(config["seed"] + rank)
    device = idist.device()

    # Define output folder:
    config.output = "/tmp/output"

    model = idist.auto_model(config.model)
    optimizer = idist.auto_optim(config.optimizer)
    criterion = config.criterion

    train_set, val_set = config.train_set, config.val_set
    train_loader = idist.auto_dataloader(train_set,
                                         batch_size=config.train_batch_size)
    val_loader = idist.auto_dataloader(val_set,
                                       batch_size=config.val_batch_size)

    trainer = create_supervised_trainer(model,
                                        optimizer,
                                        criterion,
                                        device=device)
    trainer.logger = setup_logger("Trainer")

    metrics = {"accuracy": Accuracy(), "loss": Loss(criterion)}

    train_evaluator = create_supervised_evaluator(model,
                                                  metrics=metrics,
                                                  device=device)
    train_evaluator.logger = setup_logger("Train Evaluator")
    validation_evaluator = create_supervised_evaluator(model,
                                                       metrics=metrics,
                                                       device=device)
    validation_evaluator.logger = setup_logger("Val Evaluator")

    @trainer.on(Events.EPOCH_COMPLETED(every=config.val_interval))
    def compute_metrics(engine):
        train_evaluator.run(train_loader)
        validation_evaluator.run(val_loader)

    if rank == 0:
        tb_logger = TensorboardLogger(log_dir=config.output)

        tb_logger.attach_output_handler(
            trainer,
            event_name=Events.ITERATION_COMPLETED(every=100),
            tag="training",
            output_transform=lambda loss: {"batchloss": loss},
            metric_names="all",
        )

        for tag, evaluator in [("training", train_evaluator),
                               ("validation", validation_evaluator)]:
            tb_logger.attach_output_handler(
                evaluator,
                event_name=Events.EPOCH_COMPLETED,
                tag=tag,
                metric_names=["loss", "accuracy"],
                global_step_transform=global_step_from_engine(trainer),
            )

        tb_logger.attach_opt_params_handler(
            trainer,
            event_name=Events.ITERATION_COMPLETED(every=100),
            optimizer=optimizer)

    model_checkpoint = ModelCheckpoint(
        config.output,
        n_saved=2,
        filename_prefix="best",
        score_name="accuracy",
        global_step_transform=global_step_from_engine(trainer),
    )
    validation_evaluator.add_event_handler(Events.COMPLETED, model_checkpoint,
                                           {"model": model})

    trainer.run(train_loader, max_epochs=config.num_epochs)

    if rank == 0:
        tb_logger.close()