Exemplo n.º 1
0
def test_write_results():
    true_event_handler_time = 0.125
    true_max_epochs = 3
    true_num_iters = 2
    test_folder = "./test_log_folder"

    if os.path.exists(test_folder):
        shutil.rmtree(test_folder)
    os.makedirs(test_folder)

    profiler = BasicTimeProfiler()
    dummy_trainer = Engine(_do_nothing_update_fn)
    profiler.attach(dummy_trainer)

    @dummy_trainer.on(Events.STARTED)
    def delay_start(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.COMPLETED)
    def delay_complete(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.EPOCH_STARTED)
    def delay_epoch_start(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.EPOCH_COMPLETED)
    def delay_epoch_complete(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.ITERATION_STARTED)
    def delay_iter_start(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.ITERATION_COMPLETED)
    def delay_iter_complete(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.GET_BATCH_STARTED)
    def delay_get_batch_started(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.GET_BATCH_COMPLETED)
    def delay_get_batch_completed(engine):
        time.sleep(true_event_handler_time)

    dummy_trainer.run(range(true_num_iters), max_epochs=true_max_epochs)
    profiler.write_results(test_folder + "/test_log.csv")

    assert os.path.isfile(test_folder + "/test_log.csv")

    file_length = 0
    with open(test_folder + "/test_log.csv") as f:
        for l in f:
            file_length += 1

    assert file_length == (true_max_epochs * true_num_iters) + 1

    # cleanup test log directory
    shutil.rmtree(test_folder)
Exemplo n.º 2
0
def test_profilers_wrong_inputs():
    profiler = BasicTimeProfiler()
    with pytest.raises(TypeError, match=r"Argument engine should be ignite.engine.Engine"):
        profiler.attach(None)

    with pytest.raises(RuntimeError, match=r"Need pandas to write results as files"):
        with patch.dict("sys.modules", {"pandas": None}):
            profiler.write_results("")

    profiler = HandlersTimeProfiler()
    with pytest.raises(TypeError, match=r"Argument engine should be ignite.engine.Engine"):
        profiler.attach(None)

    with pytest.raises(RuntimeError, match=r"Need pandas to write results as files"):
        with patch.dict("sys.modules", {"pandas": None}):
            profiler.write_results("")
Exemplo n.º 3
0
def test_write_results_basic_profiler(dirname):
    true_event_handler_time = 0.125
    true_max_epochs = 3
    true_num_iters = 2

    profiler = BasicTimeProfiler()
    dummy_trainer = get_prepared_engine_for_basic_profiler(true_event_handler_time)
    profiler.attach(dummy_trainer)

    dummy_trainer.run(range(true_num_iters), max_epochs=true_max_epochs)
    fp = os.path.join(dirname, "test_log.csv")
    profiler.write_results(fp)

    assert os.path.isfile(fp)

    file_length = 0
    with open(fp) as f:
        for _ in f:
            file_length += 1

    assert file_length == (true_max_epochs * true_num_iters) + 1
def create_trainer(loader, model, opt, loss_fn, device, args):

    def _update(engine, batch):
        model.train()

        x = batch['x'].to(engine.state.device, non_blocking=True)
        y = batch['y'].to(engine.state.device, non_blocking=True)
        m = batch['m'].to(engine.state.device, non_blocking=True)
        opt.zero_grad()
        y_pred = model(x)

        softmax = nn.Softmax()
        masked_loss = softmax(y_pred)
        #masked_loss = y_pred*m
        loss = loss_fn(masked_loss, y)
        if m.sum().item() / m.numel() > 0.7:
            loss.backward()
            opt.step()
        masked_loss = (masked_loss>0.5).float()
        acc = accuracy_segmentation(masked_loss[:,1,:,:,:],y[:,1,:,:,:])

        return {
            'x': x.detach(),
            'y': y.detach(),
            'm': m.detach(),
            'y_pred': y_pred.detach(),
            'loss': loss.item(),
            'acc' : acc
        }

    def _inference(engine, batch):
        model.eval()

        with th.no_grad():
            x = batch['x'].to(engine.state.device, non_blocking=True)
            y = batch['y'].to(engine.state.device, non_blocking=True)
            m = batch['m'].to(engine.state.device, non_blocking=True)

            y_pred = model(x)
            
            softmax = nn.Softmax(dim=1)
            masked_loss = softmax(y_pred)
            #masked_loss = y_pred*m
            loss = loss_fn(masked_loss, y)
            masked_loss = (masked_loss[-3:]>0.5).float()
            acc = accuracy_segmentation(masked_loss[:,1,:,:,:],y[:,1,:,:,:])

        return {
            'x': x.detach(),
            'y': y.detach(),
            'm': m.detach(),
            'y_pred': y_pred.detach(),
            'loss': loss.item(),
            'acc' : acc
        }


    #wandb.watch(model, log ='all')

    trainer = Engine(_update)
    evaluator = Engine(_inference)

    profiler = BasicTimeProfiler()
    profiler.attach(trainer)
    logdir = args.logdir
    save_ = (not args.devrun) and (not args.nosave)

    # initialize trainer state
    trainer.state.device = device
    trainer.state.hparams = args
    trainer.state.save = save_
    trainer.state.logdir = logdir

    trainer.state.df = defaultdict(dict)
    trainer.state.metrics = dict()
    trainer.state.val_metrics = dict()
    trainer.state.best_metrics = defaultdict(list)
    trainer.state.gradnorm = defaultdict(dict)

    # initialize evaluator state
    evaluator.logger = setup_logger('evaluator')
    evaluator.state.device = device
    evaluator.state.df = defaultdict(dict)
    evaluator.state.metrics = dict()

    pbar = ProgressBar(persist=True)
    ebar = ProgressBar(persist=False)

    pbar.attach(trainer, ['loss'])
    ebar.attach(evaluator, ['loss'])

    pbar.attach(trainer,['acc'])
    ebar.attach(evaluator,['acc'])

    # model summary
    if args.model_summary:
        trainer.add_event_handler(
            Events.STARTED,
            print_model_summary, model
        )

    # terminate on nan
    trainer.add_event_handler(
        Events.ITERATION_COMPLETED,
        TerminateOnNan(lambda x: x['loss'])
    )

    # metrics
    trainer.add_event_handler(
        Events.ITERATION_COMPLETED,
        _metrics
    )

    evaluator.add_event_handler(
        Events.ITERATION_COMPLETED,
        _metrics
    )

    trainer.add_event_handler(
        Events.EPOCH_COMPLETED,
        _metrics_mean
    )

    evaluator.add_event_handler(
        Events.COMPLETED,
        _metrics_mean
    )

    trainer.add_event_handler(
        #Events.STARTED | Events.EPOCH_COMPLETED,
        Events.EPOCH_COMPLETED,
        _evaluate, evaluator, loader
    )

    # logging
    trainer.add_event_handler(
        Events.EPOCH_COMPLETED,
        _log_metrics
    )

    # early stopping
    if args.early_stopping > 0:
        es_p = args.early_stopping
        es_s = lambda engine: -engine.state.metrics['loss']
        evaluator.add_event_handler(
            Events.COMPLETED,
            EarlyStopping(patience=es_p, score_function=es_s, trainer=trainer)
        )

    # lr schedulers
    if args.epoch_length is None:
        el = len(loader['train'])
    else:
        el = args.epoch_length

    if args.lr_scheduler is not None:
        lr_sched = create_lr_scheduler(opt, args, num_steps=el)

        if args.lr_scheduler != 'plateau':
            def _sched_fun(engine):
                lr_sched.step()
        else:
            def _sched_fun(engine):
                e = engine.state.epoch
                v = engine.state.val_metrics[e]['nmse']
                lr_sched.step(v)

        if args.lr_scheduler == 'linearcycle':
            trainer.add_event_handler(Events.ITERATION_STARTED, lr_sched)
        else:
            trainer.add_event_handler(Events.EPOCH_COMPLETED, _sched_fun)

    # FIXME: warmup is modifying opt base_lr -> must create last
    if args.lr_warmup > 0:
        wsched = create_lr_scheduler(opt, args, 'warmup', num_steps=el)
        wsts = wsched.total_steps
        trainer.add_event_handler(
            Events.ITERATION_COMPLETED(event_filter=lambda _, i: i <= wsts),
            lambda _: wsched.step()
        )

    # saving
    if save_:
        to_save = {
            'model': model,
            'optimizer': opt,
            'trainer': trainer,
            'evaluator': evaluator
        }

        trainer.add_event_handler(
            Events.EPOCH_COMPLETED,
            Checkpoint(to_save, DiskSaver(logdir), n_saved=3)
        )

        # handler = Checkpoint(
        #     {'model': model},
        #     DiskSaver(logdir),
        #     n_saved = 3,
        #     filename_prefix = 'best',
        #     score_function = lambda engine: -engine.state.metrics['nmae'],
        #     score_name = 'val_nmae',
        # )

        # evaluator.add_event_handler(
        #     Events.COMPLETED,
        #     handler
        # )

        # handler = Checkpoint(
        #     {'model': model},
        #     DiskSaver(logdir),
        #     n_saved = 3,
        #     filename_prefix = 'best',
        #     score_function = lambda engine: -engine.state.metrics['nmse'],
        #     score_name = 'val_nmse',
        # )

        # evaluator.add_event_handler(
        #     Events.COMPLETED,
        #     handler
        # )

        # handler = Checkpoint(
        #     {'model': model},
        #     DiskSaver(logdir),
        #     n_saved = 3,
        #     filename_prefix = 'best',
        #     score_function = lambda engine: engine.state.metrics['R2'],
        #     score_name = 'val_R2',
        # )

        # evaluator.add_event_handler(
        #     Events.COMPLETED,
        #     handler
        # )

        trainer.add_event_handler(
            Events.EPOCH_COMPLETED,
            _save_metrics
        )

        # timer
        trainer.add_event_handler(
            Events.COMPLETED | Events.TERMINATE,
            lambda _: profiler.write_results(logdir + '/time.csv')
        )

    return trainer