Exemplo n.º 1
0
def test_ignite_evaluator_reporting_metrics():
    try:
        from ignite.metrics import MeanSquaredError
    except ImportError:
        pytest.skip('pytorch-ignite is not installed')

    # This tests verifies that either, usuer manually reported metrics
    # and ignite calculated ones are correctly reflected in the reporter
    # observation
    model = IgniteDummyModel()
    n_data = 10
    x = torch.randn((n_data, 2), requires_grad=True)
    y = torch.randn((n_data, 2))
    dataset = torch.utils.data.TensorDataset(x, y)
    loader = torch.utils.data.DataLoader(dataset, batch_size=3)
    evaluator = create_dummy_evaluator(model)
    # Attach metrics to the evaluator
    metric = MeanSquaredError()
    metric.attach(evaluator, 'mse')
    evaluator_ignite_ext = ppe.training.extensions.IgniteEvaluator(
        evaluator, loader, model, progress_bar=False)
    reporter = ppe.reporting.Reporter()
    with reporter:
        result = evaluator_ignite_ext()
    # Internally reported metrics
    assert result['main/x'] == 1.5
    # Ignite calculated metric
    assert result['val/mse'] == 0.0
Exemplo n.º 2
0
def _test_distrib_accumulator_device(device):

    metric_devices = [torch.device("cpu")]
    if device.type != "xla":
        metric_devices.append(idist.device())
    for metric_device in metric_devices:

        device = torch.device(device)
        mse = MeanSquaredError(device=metric_device)
        assert mse._device == metric_device
        assert mse._sum_of_squared_errors.device == metric_device, "{}:{} vs {}:{}".format(
            type(mse._sum_of_squared_errors.device),
            mse._sum_of_squared_errors.device,
            type(metric_device),
            metric_device,
        )

        y_pred = torch.tensor([[2.0], [-2.0]])
        y = torch.zeros(2)
        mse.update((y_pred, y))
        assert mse._sum_of_squared_errors.device == metric_device, "{}:{} vs {}:{}".format(
            type(mse._sum_of_squared_errors.device),
            mse._sum_of_squared_errors.device,
            type(metric_device),
            metric_device,
        )
Exemplo n.º 3
0
def _test_distrib_integration(device, tol=1e-6):
    import numpy as np
    from ignite.engine import Engine

    rank = idist.get_rank()
    n_iters = 100
    s = 10
    offset = n_iters * s

    y_true = torch.arange(0,
                          offset * idist.get_world_size(),
                          dtype=torch.float).to(device)
    y_preds = torch.ones(offset * idist.get_world_size(),
                         dtype=torch.float).to(device)

    def update(engine, i):
        return (
            y_preds[i * s + offset * rank:(i + 1) * s + offset * rank],
            y_true[i * s + offset * rank:(i + 1) * s + offset * rank],
        )

    engine = Engine(update)

    m = MeanSquaredError()
    m.attach(engine, "mse")

    data = list(range(n_iters))
    engine.run(data=data, max_epochs=1)

    assert "mse" in engine.state.metrics
    res = engine.state.metrics["mse"]

    true_res = np.mean(np.power((y_true - y_preds).cpu().numpy(), 2.0))

    assert pytest.approx(res, rel=tol) == true_res
Exemplo n.º 4
0
def test_zero_div():
    mse = MeanSquaredError()
    with pytest.raises(
            NotComputableError,
            match=
            r"MeanSquaredError must have at least one example before it can be computed"
    ):
        mse.compute()
Exemplo n.º 5
0
def test_accumulator_detached():
    mse = MeanSquaredError()

    y_pred = torch.tensor([[2.0], [-2.0]], requires_grad=True)
    y = torch.zeros(2)
    mse.update((y_pred, y))

    assert not mse._sum_of_squared_errors.requires_grad
Exemplo n.º 6
0
def test_compute():
    mse = MeanSquaredError()

    y_pred = torch.Tensor([[2.0], [-2.0]])
    y = torch.zeros(2)
    mse.update((y_pred, y))
    assert mse.compute() == 4.0

    mse.reset()
    y_pred = torch.Tensor([[3.0], [-3.0]])
    y = torch.zeros(2)
    mse.update((y_pred, y))
    assert mse.compute() == 9.0
Exemplo n.º 7
0
    def _test(metric_device):
        engine = Engine(update)

        m = MeanSquaredError(device=metric_device)
        m.attach(engine, "mse")

        data = list(range(n_iters))
        engine.run(data=data, max_epochs=1)

        assert "mse" in engine.state.metrics
        res = engine.state.metrics["mse"]

        true_res = np.mean(np.power((y_true - y_preds).cpu().numpy(), 2.0))

        assert pytest.approx(res, rel=tol) == true_res
Exemplo n.º 8
0
def _create_cvae_evaluator(network,
                           criterion,
                           device,
                           metrics=None,
                           non_blocking=False):
    from ignite.metrics import Loss

    if metrics is None:
        metrics = {}

    def loss_output_transform(output):
        return (*output[:2], {"mu": output[3], "log_var": output[4]})

    metrics.setdefault("loss",
                       Loss(criterion, output_transform=loss_output_transform))
    metrics.setdefault("mse",
                       MeanSquaredError(output_transform=lambda x: x[:2]))

    eval_step = create_cvae_eval_step(network,
                                      device,
                                      non_blocking=non_blocking)
    evaluator = Engine(eval_step)

    for metric_name, metric in metrics.items():
        metric.attach(evaluator, metric_name)
    return evaluator
Exemplo n.º 9
0
def metrics_selector(mode, loss):
    mode = mode.lower()
    if mode == "classification":
        metrics = {
            "loss": loss,
            "accuracy": Accuracy(),
            "accuracy_topk": TopKCategoricalAccuracy(),
            "precision": Precision(average=True),
            "recall": Recall(average=True)
        }
    elif mode == "multiclass-multilabel":
        metrics = {
            "loss": loss,
            "accuracy": Accuracy(),
        }
    elif mode == "regression":
        metrics = {
            "loss": loss,
            "mse": MeanSquaredError(),
            "mae": MeanAbsoluteError()
        }
    else:
        raise RuntimeError(
            "Invalid task mode, select classification or regression")

    return metrics
Exemplo n.º 10
0
def create_sr_evaluator(
    model,
    device=None,
    non_blocking=True,
    denormalize=True,
    mean=None,
):
    # transfer mean to the device and reshape it so
    # that is is broadcastable to the BCHW format
    mean = mean.to(device).reshape(1, -1, 1, 1)

    def denorm_fn(x):
        return torch.clamp(x + mean, min=0., max=1.)

    def _evaluate_model(engine, batch):
        model.eval()
        x, y = _prepare_batch(batch, device=device, non_blocking=non_blocking)
        with torch.no_grad():
            y_pred = model(x)
        if denormalize:
            y_pred, y = map(denorm_fn, [y_pred, y])
        return y_pred, y

    engine = Engine(_evaluate_model)
    MeanAbsoluteError().attach(engine, 'l1')
    MeanSquaredError().attach(engine, 'l2')
    PNSR(max_value=1.0).attach(engine, 'pnsr')

    return engine
Exemplo n.º 11
0
def _test_distrib_accumulator_device(device):

    metric_devices = [torch.device("cpu")]
    if device.type != "xla":
        metric_devices.append(idist.device())
    for metric_device in metric_devices:

        device = torch.device(device)
        mse = MeanSquaredError(device=metric_device)

        for dev in [mse._device, mse._sum_of_squared_errors.device]:
            assert dev == metric_device, f"{type(dev)}:{dev} vs {type(metric_device)}:{metric_device}"

        y_pred = torch.tensor([[2.0], [-2.0]])
        y = torch.zeros(2)
        mse.update((y_pred, y))

        for dev in [mse._device, mse._sum_of_squared_errors.device]:
            assert dev == metric_device, f"{type(dev)}:{dev} vs {type(metric_device)}:{metric_device}"
Exemplo n.º 12
0
def test_create_supervised_with_metrics():
    model = Linear(1, 1)
    model.weight.data.zero_()
    model.bias.data.zero_()

    evaluator = create_supervised_evaluator(model, metrics={'mse': MeanSquaredError()})

    x = torch.FloatTensor([[1.0], [2.0]])
    y = torch.FloatTensor([[3.0], [4.0]])
    data = [(x, y)]

    state = evaluator.run(data)
    assert state.metrics['mse'] == 12.5
Exemplo n.º 13
0
def create_vae_engines(
    model,
    optimizer,
    criterion=None,
    metrics=None,
    device=None,
    non_blocking=False,
    fig_dir=None,
    unflatten=None,
):

    device = model.device
    if criterion is None:
        criterion = get_default_autoencoder_loss()

    train_step = create_vae_train_step(model,
                                       optimizer,
                                       criterion,
                                       device=device,
                                       non_blocking=non_blocking)
    eval_step = create_vae_eval_step(model,
                                     device=device,
                                     non_blocking=non_blocking)

    if metrics is None:
        metrics = {}
    metrics.setdefault(
        "loss",
        Loss(criterion, output_transform=loss_eval_output_transform),
    )
    metrics.setdefault("mse",
                       MeanSquaredError(output_transform=lambda x: x[:2]))
    trainer = Engine(train_step)
    evaluator = create_autoencoder_evaluator(eval_step, metrics=metrics)

    save_image_callback = create_save_image_callback(fig_dir,
                                                     unflatten=unflatten)

    def _epoch_getter():
        return trainer.state.__dict__.get("epoch", None)

    evaluator.add_event_handler(
        Events.ITERATION_COMPLETED(once=1),
        save_image_callback,
        epoch=_epoch_getter,
    )

    val_log_handler, val_logger = create_log_handler(trainer)

    return trainer, evaluator, val_log_handler, val_logger
Exemplo n.º 14
0
def test_compute():

    mse = MeanSquaredError()

    def _test(y_pred, y, batch_size):
        mse.reset()
        if batch_size > 1:
            n_iters = y.shape[0] // batch_size + 1
            for i in range(n_iters):
                idx = i * batch_size
                mse.update(
                    (y_pred[idx:idx + batch_size], y[idx:idx + batch_size]))
        else:
            mse.update((y_pred, y))

        np_y = y.numpy()
        np_y_pred = y_pred.numpy()

        np_res = np.power((np_y - np_y_pred), 2.0).sum() / np_y.shape[0]

        assert isinstance(mse.compute(), float)
        assert mse.compute() == np_res

    def get_test_cases():

        test_cases = [
            (torch.randint(0, 10,
                           size=(100, 1)), torch.randint(0, 10,
                                                         size=(100, 1)), 1),
            (torch.randint(-20, 20,
                           size=(100, 5)), torch.randint(-20,
                                                         20,
                                                         size=(100, 5)), 1),
            # updated batches
            (torch.randint(0, 10,
                           size=(100, 1)), torch.randint(0, 10,
                                                         size=(100, 1)), 16),
            (torch.randint(-20, 20,
                           size=(100, 5)), torch.randint(-20,
                                                         20,
                                                         size=(100, 5)), 16),
        ]

        return test_cases

    for _ in range(5):
        # check multiple random inputs as random exact occurencies are rare
        test_cases = get_test_cases()
        for y_pred, y, batch_size in test_cases:
            _test(y_pred, y, batch_size)
Exemplo n.º 15
0
def do_inference(cfg,
                 model,
                 test_loader,
                 classes_list,
                 loss_fn,
                 target_set_name="test",
                 plotFlag=False):
    num_classes = len(classes_list) if classes_list is not None else 0
    device = cfg.MODEL.DEVICE

    logger = logging.getLogger("classification.inference")
    logging._warn_preinit_stderr = 0
    logger.info("Enter inferencing for {} set".format(target_set_name))

    metrics_eval = {
        "mse":
        MeanSquaredError(
            output_transform=lambda x: (x["rg_logits"], x["rg_labels"])),
    }

    evaluator = create_supervised_evaluator(model,
                                            metrics=metrics_eval,
                                            loss_fn=loss_fn,
                                            device=device)

    metrics = dict()

    @evaluator.on(Events.EPOCH_COMPLETED)
    def log_inference_results(engine):
        logger.info("Test Results")
        if engine.state.metrics.get("mse") != None:
            mse = engine.state.metrics["mse"]
            logger.info("MSE: {:.3f}".format(mse))
            metrics["mse"] = mse

    evaluator.run(test_loader)

    return metrics
Exemplo n.º 16
0
def fit_naive_model():
    train_ds = PhotocurrentData("Spectral Responsivity Data Summary.csv",
                                params="model_params.json")
    eval_ds = PhotocurrentData("Spectral Responsivity Data Summary.csv",
                               params="model_params.json")
    _, _, y_mean, _ = train_ds.data_std()
    # criterion = MSELoss()
    # criterion = SmoothL1Loss()

    metrics = {
        "MSE": MeanSquaredError(),
    }

    train_loader = DataLoader(train_ds, shuffle=True, batch_size=2)
    val_loader = DataLoader(eval_ds, shuffle=True, batch_size=2)

    model = NaiveSpectralModel(wavelengths=train_ds.wavelengths,
                               params="model_params.json")
    optimizer = Adam(model.parameters(), lr=1e-2)
    # criterion = SmoothWeightsLoss(model, weights=1/y_mean, lambda_l1=1e-2, lambda_rows=1e-3, lambda_cols=1e-3, lambda_norm=0)
    criterion = SmoothWeightsLoss(model,
                                  lambda_l1=1e-3,
                                  lambda_rows=0,
                                  lambda_cols=0,
                                  lambda_norm_rows=0,
                                  lambda_norm_cols=0)
    trainer = get_trainer(model, train_loader, val_loader, criterion,
                          optimizer, metrics)
    trainer.run(train_loader, max_epochs=200)

    plot_weights_R_inv(model)
    plot_prediction_R_inv(model, train_ds)
    plot_test_R_inv(model,
                    "Reconstruction Data Summary - BP-5um data.csv",
                    title="BP-5\u03BCm",
                    normlizer=train_ds.normlize)
Exemplo n.º 17
0
def _attach_peaks_related(engine: Engine, prefix: str = ""):
    transform = lambda x: (x["refenrichment"].flatten(), x["predenrichment"].
                           flatten())
    MeanSquaredError(transform).attach(engine, prefix + "mse")
    NonZeroMeanSquaredError(transform).attach(engine, prefix + "non_zero_mse")
Exemplo n.º 18
0
def train_network(
    net, train_loader, valid_loader, hparameters, device, dtype,
    loggers=[False, False, False], log_dir=None):
    """
    Network trainer using the ignite framework.

    Args:
        net (`torch.nn.Module`): the model to be trained.
        train_loader (`torch.data.DataLoader`): data loader for the training set.
        valid_loader (`torch.data.DataLoader`): data loader for the validation set.
        hparameters (dict): hyper-parameters for the training process.
        device (`torch.device`): device on which training and evaluation are performed.
        dtype (`torch.dtype`): data type for the tensors under processing.
        loggers (list of len 3): verbosity of the trainer.
        
    TODO:
        - At the moment, tensorboard logs are commented.
        - There is a discrepancy with the training loss and the MSE metric, as the implementation
          in the pytorch core and in ignite are not consistent.
    """

    # Define loss and optimizer
    criterion = nn.MSELoss(reduction=hparameters.get('mse_reduction', 'mean'))
    metrics = {'mse': MeanSquaredError()}

    optimizer = get_optimiser(hparameters.get('optimiser', 'adam'), net.parameters(), hparameters)

    # define training and evaluation engines
    trainer = create_supervised_trainer(net, optimizer, criterion, device, dtype)
    train_evaluator = create_supervised_evaluator(net, metrics, device, dtype)
    valid_evaluator = create_supervised_evaluator(net, metrics, device, dtype)

    # adding early stopping criterion
    def score_function(engine):
        val_loss = engine.state.metrics['mse']
        return -val_loss
    es_handler = EarlyStopping(
        patience=hparameters.get('patience', 20),
        score_function=score_function, trainer=trainer, model=net)
    # the handler is attached to an *Evaluator* (runs one epoch on validation dataset).
    valid_evaluator.add_event_handler(Events.COMPLETED, es_handler, net)

    # keep track of the training and validation loss
    tr_history = {'training_loss': [], 'validation_loss': []}
    # create_dir(log_dir)  # creating the log dir if not None
    # writer = create_summary_writer(net, train_loader, log_dir)

    if(loggers[0]):
        @trainer.on(Events.ITERATION_COMPLETED)
        def log_training_loss(trainer):
            print("Epoch {} | Batch {} | Loss: {:.2f}".format(
                trainer.state.epoch, (trainer.state.iteration - 1) % len(train_loader), trainer.state.output))
            # writer.add_scalar("training/loss", trainer.state.output, trainer.state.iteration)

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(trainer):
        train_evaluator.run(train_loader)
        metrics = train_evaluator.state.metrics
        tr_history['training_loss'].append(metrics['mse'])
        if(loggers[1]):
            # writer.add_scalars('MSE', {"training": metrics['mse']}, trainer.state.epoch)
            print("Epoch: {} - Training loss: {:.2f} | MSE: {:.2f}"
                  .format(trainer.state.epoch, trainer.state.output, metrics['mse']))

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(trainer):
        valid_evaluator.run(valid_loader)
        metrics = valid_evaluator.state.metrics
        tr_history['validation_loss'].append(metrics['mse'])
        if(loggers[2]):
            # writer.add_scalars('MSE', {"validation": metrics['mse']}, trainer.state.epoch)
            print("Epoch: {} - Validation MSE: {:.2f}"
                  .format(trainer.state.epoch, metrics['mse']))

    trainer.run(train_loader, max_epochs=hparameters.get("num_epochs", 10000))

    # writer.close()
    return es_handler.get_best_model_after_stop(), tr_history
Exemplo n.º 19
0
    train_dataset, val_dataset = random_split(
        torch_ds, [X_scaled.shape[0] - num_split, num_split])
    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=128)

    # Setup
    model = Autoencoder(input_dim=X_scaled.shape[1],
                        hidden_dim=100,
                        latent_dim=3).to(device)
    optimizer = torch.optim.Adam(params=model.parameters())
    loss = nn.MSELoss()

    # Ignite me
    trainer = create_supervised_trainer(model, optimizer, loss, device=device)
    evaluator = create_supervised_evaluator(
        model, metrics={'mse': MeanSquaredError()}, device=device)

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_loss(trainer):
        print("Training results - EPOCH [{}]: Avg loss: {:.3f}".format(
            trainer.state.epoch, trainer.state.output))

    @trainer.on(Events.EPOCH_STARTED)
    def validation_loss(trainer):
        if trainer.state.epoch == 1:
            evaluator.run(val_loader)
            metrics = evaluator.state.metrics
            print("Validation Results - Epoch: {}  Avg MSE: {:.2f}".format(
                trainer.state.epoch, metrics['mse']))

    @trainer.on(Events.EPOCH_COMPLETED)
Exemplo n.º 20
0
# Setup logging
log_dir = 'runs/meta_rec_mf_bias_' + str(datetime.now()).replace(' ', '_')
writer = SummaryWriter(log_dir=log_dir)

# Instantiate the model class object
model = MF(n_user, n_item, writer=writer, k=k, c_bias=c_bias, c_vector=c_vector)

# Use Adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

# Create a supervised trainer
trainer = create_supervised_trainer(model, optimizer, model.loss)

# Use Mean Squared Error as evaluation metric
metrics = {'evaluation': MeanSquaredError()}

# Create a supervised evaluator
evaluator = create_supervised_evaluator(model, metrics=metrics)

# Load the train and test data
train_loader = Loader(train_x, train_y, batchsize=1024)
test_loader = Loader(test_x, test_y, batchsize=1024)


def log_training_loss(engine, log_interval=500):
    """
    Function to log the training loss
    """
    model.itr = engine.state.iteration  # Keep track of iterations
    if model.itr % log_interval == 0:
Exemplo n.º 21
0
def do_train(
        cfg,
        model,
        train_loader,
        val_loader,
        classes_list,
        optimizer,
        scheduler,
        loss_fn,
        start_epoch,
):
    # 1.Load parameters from cfg
    epochs = cfg.SOLVER.MAX_EPOCHS
    log_period = cfg.SOLVER.LOG_PERIOD
    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD
    eval_period = cfg.SOLVER.EVAL_PERIOD
    output_dir = cfg.SOLVER.OUTPUT_DIR
    device = cfg.MODEL.DEVICE

    # 2.Recording tools setup
    # (1) Logger
    logger = logging.getLogger("classification.train")   # corresponding to logger("classification")
    logger.info("Start training")

    # (2) TensorBoard SummaryWriter
    # save progress
    writer_train = SummaryWriter(cfg.SOLVER.OUTPUT_DIR + "/summary/train/")
    writer_val = SummaryWriter(cfg.SOLVER.OUTPUT_DIR + "/summary/val")
    # save graph
    writer_graph = SummaryWriter(cfg.SOLVER.OUTPUT_DIR + "/summary/train/graph")

    inputshape = None
    try:
        data = next(iter(train_loader))
        input = data[0]
        #inputshape = (input.shape[1], input.shape[2], input.shape[3]) if len(input.shape)==4 else (input.shape[1], input.shape[2])
        inputshape = [input.shape[i] for i in range(1, len(input.shape))]
        """
        grid = torchvision.utils.make_grid(input)
        writer_graph.add_image('images', grid, 0)
        writer_graph.add_graph(model, input)
        writer_graph.flush()
        """
    except Exception as e:
        print("Failed to save model graph: {}".format(e))

    # 3.Create engine
    # metrics relevant to training
    metrics_train = {
        "avg_total_loss": RunningAverage(output_transform=lambda x: x["total_loss"]),
        "accuracy": RunningAverage(MeanSquaredError(output_transform=lambda x: (x["cf_logits"], x["cf_labels"]))),
        "mse": RunningAverage(MeanSquaredError(output_transform=lambda x: (x["rg_logits"], x["rg_labels"]))),
    }

    # add seperate metrics
    lossKeys = cfg.LOSS.TYPE.split(" ")
    if "counts_regression_loss" in lossKeys:
        lossKeys.append("counts_classification_loss")

    for lossName in lossKeys:
        #"""
        if lossName == "contact_prediction_loss":
            metrics_train["AVG-" + "contact_prediction_loss"] = RunningAverage(
                output_transform=lambda x: x["losses"]["contact_prediction_loss"])
        elif lossName == "secondary_structure_prediction_loss":
            metrics_train["AVG-" + "secondary_structure_prediction_loss"] = RunningAverage(
                output_transform=lambda x: x["losses"]["secondary_structure_prediction_loss"])
        else:
            raise Exception('expected METRIC_LOSS_TYPE should not be {}'.format(cfg.LOSS.TYPE))

    # create engine with metrics attached
    trainer = create_supervised_trainer(model, optimizer, metrics_train, loss_fn, device=device, )

    # attach checkpointer & timer to the engine
    checkpointer = ModelCheckpoint(output_dir, cfg.MODEL.BACKBONE_NAME, checkpoint_period, n_saved=300, require_empty=False, start_step=start_epoch)
    trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, {'model': model, 'optimizer': optimizer})

    #checkpointer_save_graph = ModelCheckpoint(output_dir, cfg.MODEL.BACKBONE_NAME, checkpoint_period, n_saved=300, require_empty=False, start_step=-1)
    #trainer.add_event_handler(Events.STARTED, checkpointer_save_graph, {'model': model, 'optimizer': optimizers[0]})

    timer = Timer(average=True)
    timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED,
                 pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED)

    # 4.Other event handlers
    @trainer.on(Events.STARTED)
    def start_training(engine):
        engine.state.epoch = start_epoch
        engine.state.iteration = engine.state.iteration + start_epoch * len(train_loader)

        logger.info("Model:{}".format(model))
        print("Input Shape: {}".format(inputshape))
        #inputshape = (cfg.DATA.TRANSFORM.CHANNEL, cfg.DATA.TRANSFORM.SIZE[0], cfg.DATA.TRANSFORM.SIZE[1])
        #logger.info("Model:{}".format(model.count_param(input_shape=inputshape)))

        #metrics = do_inference(cfg, model, val_loader, classes_list, loss_fn, plotFlag=False)

    @trainer.on(Events.EPOCH_COMPLETED)  # 注意,在pytorch1.2里面 scheduler.steo()应该放到 optimizer.step()之后
    def adjust_learning_rate(engine):
        scheduler.step()

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_training_loss(engine):
        global ITER
        ITER += 1

        if ITER % (log_period * engine.state.accumulation_steps) == 0:
            step = engine.state.iteration

            # 1.Tensorboard Summary
            # loss (vector)
            avg_losses = {}
            for lossName in lossKeys:
                avg_losses[lossName] = (float("{:.3f}".format(engine.state.metrics["AVG-" + lossName])))
                writer_train.add_scalar("Loss/" + lossName, avg_losses[lossName], step)
                writer_train.flush()

            # other scalars
            scalar_list = ["mse", "avg_total_loss"]
            for scalar in scalar_list:
                writer_train.add_scalar("Train/" + scalar, engine.state.metrics[scalar], step)
                writer_train.flush()

            # learning rate
            writer_train.add_scalar("Train/" + "LearningRate", scheduler.get_lr()[0], step)
            writer_train.flush()

            # 2.logger
            logger.info("Epoch[{}] Iteration[{}/{}] ATLoss: {:.3f}, Avg_Loss: {}, Accuracy: {:.3f}, Base Lr: {:.2e}, step: {}"
                        .format(engine.state.epoch, ITER, len(train_loader),
                                engine.state.metrics['avg_total_loss'], avg_losses,
                                engine.state.metrics['accuracy'],
                                #engine.state.metrics['mse'],
                                scheduler.get_lr()[0], step))

        if len(train_loader) == ITER:
            ITER = 0

    # adding handlers using `trainer.on` decorator API
    @trainer.on(Events.EPOCH_COMPLETED)
    def print_times(engine):
        logger.info('Epoch {} done. Time per batch: {:.3f}[s] Speed: {:.1f}[samples/s]'
                    .format(engine.state.epoch, timer.value() * timer.step_count,
                            train_loader.batch_size / timer.value()))
        logger.info('-' * 10)
        timer.reset()

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(engine):
        if engine.state.epoch % eval_period == 0:
            metrics = do_inference(cfg, model, val_loader, classes_list, loss_fn, target_set_name="valid", plotFlag=False) #不进行绘制

            step = engine.state.iteration

            writer_val.add_scalar("MSE", metrics['mse'], step)
            writer_val.flush()


    # 5.launch engine
    trainer.run(train_loader, max_epochs=epochs)
    writer_train.close()
    writer_val.close()
Exemplo n.º 22
0
def create_default_engines_from_steps(
    train_step,
    eval_step,
    criterion=None,
    metrics=None,
    fig_dir=None,
    unflatten=None,
):
    """
    create_default_engines_from_steps(
        train_step,
        eval_step,
        criterion=None,
        metrics=None,
        fig_dir=None,
        unflatten=None,
    )

    Parameters
    ----------
    train_step : callable
        The update function for the trainer
    eval_step : callable
        The update function for the evaluator
    criterion : nn.Loss (optional)
        Note: if criterion is not passed, then validation loss will not be
        tracked by ignite, unless passed via metrics.
    metrics : dict (optional)
    fig_dir : string (optional)
    unflatten : tuple (optional)
    
    Returns
    -------
    trainer : ignite Engine
    evaluator : ignite Engine
    val_log_handler : ignite handler
        To be used with add_evaluation and some dataloaders, in order to track
        progress on a validation set during training.
    val_logger : util.Logger
        Object containing the validation metric data from training.
    """
    if metrics is None:
        metrics = {}
    if criterion is not None:
        metrics.setdefault(
            "loss",
            Loss(criterion, output_transform=loss_eval_output_transform),
        )
    metrics.setdefault("mse",
                       MeanSquaredError(output_transform=lambda x: x[:2]))
    trainer = Engine(train_step)
    evaluator = create_autoencoder_evaluator(eval_step, metrics=metrics)

    save_image_callback = create_save_image_callback(fig_dir,
                                                     unflatten=unflatten)

    def _epoch_getter():
        return trainer.state.__dict__.get("epoch", None)

    evaluator.add_event_handler(
        Events.ITERATION_COMPLETED(once=1),
        save_image_callback,
        epoch=_epoch_getter,
    )

    val_log_handler, val_logger = create_log_handler(trainer)

    return trainer, evaluator, val_log_handler, val_logger
Exemplo n.º 23
0
def run(config):
    train_loader = get_instance(utils, 'dataloader', config, 'train')
    val_loader = get_instance(utils, 'dataloader', config, 'val')

    model = get_instance(models, 'arch', config)

    model = init_model(model, train_loader)
    model, device = ModelPrepper(model, config).out

    loss_fn = get_instance(nn, 'loss_fn', config)

    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = get_instance(torch.optim, 'optimizer', config,
                             trainable_params)

    writer = create_summary_writer(config, model, train_loader)
    batch_size = config['dataloader']['args']['batch_size']

    if config['mode'] == 'eval' or config['resume']:
        model.load_state_dict(torch.load(config['ckpt_path']))

    epoch_length = int(ceil(len(train_loader) / batch_size))
    desc = "ITERATION - loss: {:.2f}"
    pbar = tqdm(initial=0,
                leave=False,
                total=epoch_length,
                desc=desc.format(0))

    def process_batch(engine, batch):
        inputs, outputs = func(batch)
        model.train()
        model.zero_grad()
        optimizer.zero_grad()
        preds = model(inputs)
        loss = loss_fn(preds, outputs.to(device))

        a = list(model.parameters())[0].clone()

        loss.backward()
        optimizer.step()

        # check if training is happening
        b = list(model.parameters())[0].clone()
        try:
            assert not torch.allclose(a.data,
                                      b.data), 'Model not updating anymore'
        except AssertionError:
            plot_grad_flow(model.named_parameters())

        return loss.item()

    def predict_on_batch(engine, batch):
        inputs, outputs = func(batch)
        model.eval()
        with torch.no_grad():
            y_pred = model(inputs)

        return inputs, y_pred, outputs.to(device)

    trainer = Engine(process_batch)
    trainer.logger = setup_logger("trainer")
    evaluator = Engine(predict_on_batch)
    evaluator.logger = setup_logger("evaluator")

    if config['task'] == 'actionpred':
        Accuracy(output_transform=lambda x: (x[1], x[2])).attach(
            evaluator, 'val_acc')

    if config['task'] == 'gazepred':
        MeanSquaredError(output_transform=lambda x: (x[1], x[2])).attach(
            evaluator, 'val_MSE')

    RunningAverage(output_transform=lambda x: x).attach(trainer, 'loss')

    training_saver = ModelCheckpoint(config['checkpoint_dir'],
                                     filename_prefix='checkpoint_' +
                                     config['task'],
                                     n_saved=1,
                                     atomic=True,
                                     save_as_state_dict=True,
                                     create_dir=True,
                                     require_empty=False)

    trainer.add_event_handler(Events.EPOCH_COMPLETED, training_saver,
                              {'model': model})

    @trainer.on(Events.ITERATION_COMPLETED)
    def tb_log(engine):
        pbar.desc = desc.format(engine.state.output)
        pbar.update(1)
        writer.add_scalar('training/avg_loss', engine.state.metrics['loss'],
                          engine.state.iteration)

    @trainer.on(Events.EPOCH_COMPLETED)
    def print_trainer_logs(engine):
        pbar.refresh()

        avg_loss = engine.state.metrics['loss']
        tqdm.write('Trainer Results - Epoch {} - Avg loss: {:.2f} \n'.format(
            engine.state.epoch, avg_loss))
        viz_param(writer=writer, model=model, global_step=engine.state.epoch)

        pbar.n = pbar.last_print_n = 0

    @evaluator.on(Events.EPOCH_COMPLETED)
    def print_result(engine):
        try:
            print('Evaluator Results - Accuracy {} \n'.format(
                engine.state.metrics['val_acc']))
        except KeyError:
            print('Evaluator Results - MSE {} \n'.format(
                engine.state.metrics['val_MSE']))

    @evaluator.on(Events.ITERATION_COMPLETED)
    def viz_outputs(engine):
        visualize_outputs(writer=writer,
                          state=engine.state,
                          task=config['task'])

    if config['mode'] == 'train':
        trainer.run(train_loader,
                    max_epochs=config['epochs'],
                    epoch_length=epoch_length)

    pbar.close()

    evaluator.run(val_loader,
                  max_epochs=1,
                  epoch_length=int(ceil(len(val_loader) / batch_size)))

    writer.flush()
    writer.close()
Exemplo n.º 24
0
def train():
    """Training code.
    Most codes are of initialization and training setup; the actual training loop is hidden by functions of chainer.
    Each training iteration is implemented in `SimpleUpdater.update_core()`.
    """
    import argparse

    # By using `argparse` module, you can specify parameters as command-line arguments.
    parser = argparse.ArgumentParser(description="Example of training")
    parser.add_argument(
        "--gpu",
        dest="use_gpu",
        action="store_true",
        help="GPU ID. Generally, setting 0 to use GPU, or -1 to use CPU.")
    parser.add_argument("--dataset-train",
                        type=str,
                        default="dataset/train.csv",
                        help="Training dataset.")
    parser.add_argument("--dataset-validation",
                        type=str,
                        default="dataset/validation.csv",
                        help="Validation dataset.")
    parser.add_argument("--epochs",
                        type=int,
                        default=100,
                        help="Number of training epochs.")
    parser.add_argument("--batchsize",
                        type=int,
                        default=64,
                        help="Size of a mini-batch.")
    parser.add_argument("--n-units",
                        type=int,
                        default=64,
                        help="Number of hidden units.")
    parser.add_argument("--out", default="result", help="Output directory.")
    args = parser.parse_args()

    # Setup a neural network
    in_dim = 3  # Input dimension
    out_dim = 3  # Output dimension
    model = SimpleMLP(in_dim, out_dim, args.n_units)

    # Enable GPU if specified
    if args.use_gpu:
        device = "cuda"
        model = model.to("cuda")  # Move the model to GPU memory
    else:
        device = "cpu"

    # Setup an optimizer.
    # The optimizer specifies the model to be trained and parameter updating method.
    optimizer = optim.Adam(
        model.parameters())  # Use Adam, one of the gradient descent method

    # Load a training dataset and validation dataset.
    train_loader = torch.utils.data.DataLoader(load_dataset(
        args.dataset_train),
                                               batch_size=args.batchsize,
                                               shuffle=True)
    val_loader = torch.utils.data.DataLoader(load_dataset(
        args.dataset_validation),
                                             batch_size=1000,
                                             shuffle=False)

    # Setup a loss function.
    # In this example, the mean squared error is used.
    loss = nn.MSELoss()

    # Setup a trainer.
    trainer = create_supervised_trainer(model, optimizer, loss, device)

    # Setup an evaluator.
    metrics = {'accuracy': MeanSquaredError(), 'nll': Loss(loss)}
    evaluator = create_supervised_evaluator(model, metrics, device)

    # Setup a log writer
    writer = SummaryWriter(log_dir=args.out)

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_training_loss(trainer):
        print("Epoch[{}] Loss: {:.5f}".format(trainer.state.epoch,
                                              trainer.state.output),
              end="\r")

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(trainer):
        evaluator.run(train_loader)
        metrics = evaluator.state.metrics
        print(
            "Training Results   - Epoch: {:3d}  Avg accuracy: {:.5f} Avg loss: {:.5f}"
            .format(trainer.state.epoch, metrics['accuracy'], metrics['nll']))
        writer.add_scalar("training/avg_loss", metrics['nll'],
                          trainer.state.epoch)

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(trainer):
        evaluator.run(val_loader)
        metrics = evaluator.state.metrics
        print(
            "Validation Results - Epoch: {:3d}  Avg accuracy: {:.5f} Avg loss: {:.5f}"
            .format(trainer.state.epoch, metrics['accuracy'], metrics['nll']))
        writer.add_scalar("validation/avg_loss", metrics['accuracy'],
                          trainer.state.epoch)

    # Settings of model saving
    handler = ModelCheckpoint(dirname=args.out,
                              filename_prefix='sample',
                              save_interval=10,
                              n_saved=3,
                              create_dir=True,
                              require_empty=False)
    trainer.add_event_handler(Events.EPOCH_COMPLETED, handler,
                              {'mymodel': model})

    # Start training
    trainer.run(train_loader, max_epochs=args.epochs)

    writer.close()
Exemplo n.º 25
0
def test():
    from tqdm import tqdm
    import torch.utils.data
    from scipy.stats import pearsonr
    from sklearn.metrics import mean_squared_error, r2_score

    device = 'cuda'
    true = torch.rand(100_000)
    pred = true + .3 * torch.randn_like(true)
    ds = torch.utils.data.TensorDataset(true, pred)

    print('Manual:')
    print('  Mean true:', true.mean().item())
    print('  Var true :', true.var().item())
    print('  Var pred :', pred.var().item())
    print('  Cov   :', np.cov(true, pred)[0, 1])
    print('  MSE   :', mean_squared_error(true, pred))
    print('  R     :', pearsonr(true, pred)[0])
    print('  R2    :', r2_score(true, pred))
    print()

    mean_true = Mean()
    var_true = Variance()
    var_pred = Variance()
    cov = Covariance()
    mse = MeanSquaredError()
    r = PearsonR()
    r2 = R2()

    for batch_size in [len(ds), 25_000, 1_000, 5, 1]:
        mean_true.reset()
        var_true.reset()
        var_pred.reset()
        cov.reset()
        mse.reset()
        r.reset()
        r2.reset()

        dl = torch.utils.data.DataLoader(ds,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=0)
        for true_batch, pred_batch in tqdm(dl):
            true_batch = true_batch.to(device)
            pred_batch = pred_batch.to(device)
            mean_true.update(true)
            var_true.update(true)
            var_pred.update(pred)
            cov.update((pred_batch, true_batch))
            mse.update((pred_batch, true_batch))
            r.update((pred_batch, true_batch))
            r2.update((pred_batch, true_batch))

        print(f'Batch size {batch_size}:')
        print(
            f'  Mean true: {mean_true.compute()} ({mean_true.compute() - true.mean().item():.0E})'
        )
        print(
            f'  Var true : {var_true.compute()} ({var_true.compute() - true.var().item():.0E})'
        )
        print(
            f'  Var pred : {var_pred.compute()} ({var_pred.compute() - pred.var().item():.0E})'
        )
        print(
            f'  Cov   : {cov.compute()} ({cov.compute() - np.cov(true, pred)[0, 1]:.0E})'
        )
        print(
            f'  MSE   : {mse.compute()} ({mse.compute() - mean_squared_error(true, pred):.0E})'
        )
        print(
            f'  R     : {r.compute()} ({r.compute() - pearsonr(true, pred)[0]:.0E})'
        )
        print(
            f'  R2    : {r2.compute()} ({r2.compute() - r2_score(true, pred):.0E})'
        )
        print()
Exemplo n.º 26
0
def test_zero_div():
    mse = MeanSquaredError()
    with pytest.raises(NotComputableError):
        mse.compute()
Exemplo n.º 27
0
                         num_workers=num_workers)

model = Model(number_of_classes=number_of_classes)

optimizer = optim.Adam(model.parameters(), lr=args.learningrate)

trainer = create_supervised_trainer(model, optimizer, criterion, device=device)

metrics = {
    "accuracy":
    Accuracy(),
    "MAE":
    MeanAbsoluteError(
        output_transform=lambda out: (torch.max(out[0], dim=1)[1], out[1])),
    "MSE":
    MeanSquaredError(
        output_transform=lambda out: (torch.max(out[0], dim=1)[1], out[1])),
    "loss":
    Loss(loss_fn=criterion)
}

evaluator = create_supervised_evaluator(model, metrics=metrics, device=device)


@trainer.on(Events.ITERATION_COMPLETED)
def log_training_loss(trainer):
    print(
        f"Training (Epoch {trainer.state.epoch}): {trainer.state.output:.3f}")


best_epoch = 0
best_val_metrics = {"MAE": np.inf}