Пример #1
0
def test_event_handler_get_batch_completed():
    true_event_handler_time = 0.1
    true_max_epochs = 1
    true_num_iters = 2

    profiler = BasicTimeProfiler()
    dummy_trainer = Engine(_do_nothing_update_fn)
    profiler.attach(dummy_trainer)

    @dummy_trainer.on(Events.GET_BATCH_COMPLETED)
    def delay_get_batch_completed(engine):
        time.sleep(true_event_handler_time)

    dummy_trainer.run(range(true_num_iters), max_epochs=true_max_epochs)
    results = profiler.get_results()
    event_results = results["event_handlers_stats"]["GET_BATCH_COMPLETED"]

    assert event_results["min/index"][0] == approx(true_event_handler_time,
                                                   abs=1e-1)
    assert event_results["max/index"][0] == approx(true_event_handler_time,
                                                   abs=1e-1)
    assert event_results["mean"] == approx(true_event_handler_time, abs=1e-1)
    assert event_results["std"] == approx(0.0, abs=1e-1)
    assert event_results["total"] == approx(true_max_epochs * true_num_iters *
                                            true_event_handler_time,
                                            abs=1e-1)
Пример #2
0
def test_dataflow_timer_basic_profiler():
    true_dataflow_time_per_ele = 0.1
    true_max_epochs = 1
    true_num_iters = 2

    def dummy_data_loader(data):
        while True:
            for d in data:
                time.sleep(true_dataflow_time_per_ele)
                yield d

    dummy_data = range(true_num_iters)

    profiler = BasicTimeProfiler()
    dummy_trainer = Engine(_do_nothing_update_fn)
    profiler.attach(dummy_trainer)
    dummy_trainer.run(dummy_data_loader(dummy_data), max_epochs=true_max_epochs, epoch_length=true_num_iters)
    results = profiler.get_results()
    dataflow_results = results["dataflow_stats"]

    assert dataflow_results["min/index"][0] == approx(true_dataflow_time_per_ele, abs=1e-1)
    assert dataflow_results["max/index"][0] == approx(true_dataflow_time_per_ele, abs=1e-1)
    assert dataflow_results["mean"] == approx(true_dataflow_time_per_ele, abs=1e-1)
    assert dataflow_results["std"] == approx(0.0, abs=1e-1)
    assert dataflow_results["total"] == approx(true_num_iters * true_dataflow_time_per_ele, abs=1e-1)
Пример #3
0
def test_event_handler_iteration_started_basic_profiler():
    true_event_handler_time = 0.1
    true_max_epochs = 1
    true_num_iters = 2

    profiler = BasicTimeProfiler()
    dummy_trainer = Engine(_do_nothing_update_fn)
    profiler.attach(dummy_trainer)

    @dummy_trainer.on(Events.ITERATION_STARTED)
    def delay_iter_start(engine):
        time.sleep(true_event_handler_time)

    dummy_trainer.run(range(true_num_iters), max_epochs=true_max_epochs)
    results = profiler.get_results()
    event_results = results["event_handlers_stats"]["ITERATION_STARTED"]

    assert event_results["min/index"][0] == approx(true_event_handler_time,
                                                   abs=1e-1)
    assert event_results["max/index"][0] == approx(true_event_handler_time,
                                                   abs=1e-1)
    assert event_results["mean"] == approx(true_event_handler_time, abs=1e-1)
    assert event_results["std"] == approx(0.0, abs=1e-1)
    assert event_results["total"] == approx(true_max_epochs * true_num_iters *
                                            true_event_handler_time,
                                            abs=1e-1)
Пример #4
0
def test_event_handler_total_time():
    true_event_handler_time = 0.125
    true_max_epochs = 1
    true_num_iters = 1

    profiler = BasicTimeProfiler()
    dummy_trainer = Engine(_do_nothing_update_fn)
    profiler.attach(dummy_trainer)

    @dummy_trainer.on(Events.STARTED)
    def delay_start(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.COMPLETED)
    def delay_complete(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.EPOCH_STARTED)
    def delay_epoch_start(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.EPOCH_COMPLETED)
    def delay_epoch_complete(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.ITERATION_STARTED)
    def delay_iter_start(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.ITERATION_COMPLETED)
    def delay_iter_complete(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.GET_BATCH_STARTED)
    def delay_get_batch_started(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.GET_BATCH_COMPLETED)
    def delay_get_batch_completed(engine):
        time.sleep(true_event_handler_time)

    dummy_trainer.run(range(true_num_iters), max_epochs=true_max_epochs)
    results = profiler.get_results()
    event_results = results["event_handlers_stats"]

    assert event_results["total_time"].item() == approx(
        true_event_handler_time * 8, abs=1e-1)
Пример #5
0
def test_event_handler_completed_basic_profiler():
    true_event_handler_time = 0.1
    true_max_epochs = 2
    true_num_iters = 2

    profiler = BasicTimeProfiler()
    dummy_trainer = Engine(_do_nothing_update_fn)
    profiler.attach(dummy_trainer)

    @dummy_trainer.on(Events.COMPLETED)
    def delay_complete(engine):
        time.sleep(true_event_handler_time)

    dummy_trainer.run(range(true_num_iters), max_epochs=true_max_epochs)
    results = profiler.get_results()
    event_results = results["event_handlers_stats"]["COMPLETED"]

    assert event_results["total"] == approx(true_event_handler_time, abs=1e-1)
Пример #6
0
def test_get_intermediate_results_during_run_basic_profiler(capsys):
    true_event_handler_time = 0.0645
    true_max_epochs = 2
    true_num_iters = 5

    profiler = BasicTimeProfiler()
    dummy_trainer = get_prepared_engine_for_basic_profiler(true_event_handler_time)
    profiler.attach(dummy_trainer)

    @dummy_trainer.on(Events.ITERATION_COMPLETED(every=3))
    def log_results(_):
        results = profiler.get_results()
        profiler.print_results(results)
        captured = capsys.readouterr()
        out = captured.out
        assert "BasicTimeProfiler._" not in out
        assert "nan" not in out
        assert " min/index: (0.0, " not in out, out

    dummy_trainer.run(range(true_num_iters), max_epochs=true_max_epochs)
Пример #7
0
def test_processing_timer_basic_profiler():
    true_processing_time = 0.1
    true_max_epochs = 2
    true_num_iters = 2

    def train_updater(engine, batch):
        time.sleep(true_processing_time)

    profiler = BasicTimeProfiler()
    dummy_trainer = Engine(train_updater)
    profiler.attach(dummy_trainer)
    dummy_trainer.run(range(true_num_iters), max_epochs=true_max_epochs)
    results = profiler.get_results()
    processing_results = results["processing_stats"]

    assert processing_results["min/index"][0] == approx(true_processing_time, abs=1e-1)
    assert processing_results["max/index"][0] == approx(true_processing_time, abs=1e-1)
    assert processing_results["mean"] == approx(true_processing_time, abs=1e-1)
    assert processing_results["std"] == approx(0.0, abs=1e-1)
    assert processing_results["total"] == approx(true_max_epochs * true_num_iters * true_processing_time, abs=1e-1)
def test_event_handler_completed():
    true_event_handler_time = 0.1
    true_max_epochs = 2
    true_num_iters = 2

    profiler = BasicTimeProfiler()
    dummy_trainer = Engine(_do_nothing_update_fn)
    profiler.attach(dummy_trainer)

    @dummy_trainer.on(Events.COMPLETED)
    def delay_complete(engine):
        time.sleep(true_event_handler_time)

    dummy_trainer.run(range(true_num_iters), max_epochs=true_max_epochs)
    results = profiler.get_results()
    event_results = results['event_handlers_stats']['Events_COMPLETED']

    assert event_results['min/index'][0] == approx(true_event_handler_time, abs=1e-1)
    assert event_results['max/index'][0] == approx(true_event_handler_time, abs=1e-1)
    assert event_results['mean'] == approx(true_event_handler_time, abs=1e-1)
def test_processing_timer():
    true_processing_time = 0.1
    true_max_epochs = 2
    true_num_iters = 2

    def train_updater(engine, batch):
        time.sleep(true_processing_time)

    profiler = BasicTimeProfiler()
    dummy_trainer = Engine(train_updater)
    profiler.attach(dummy_trainer)
    dummy_trainer.run(range(true_num_iters), max_epochs=true_max_epochs)
    results = profiler.get_results()
    processing_results = results['processing_stats']

    assert processing_results['min/index'][0] == approx(true_processing_time, abs=1e-1)
    assert processing_results['max/index'][0] == approx(true_processing_time, abs=1e-1)
    assert processing_results['mean'] == approx(true_processing_time, abs=1e-1)
    assert processing_results['std'] == approx(0., abs=1e-1)
    assert processing_results['total']\
        == approx(true_max_epochs * true_num_iters * true_processing_time, abs=1e-1)
Пример #10
0
def test_write_results():
    true_event_handler_time = 0.125
    true_max_epochs = 3
    true_num_iters = 2
    test_folder = "./test_log_folder"

    if os.path.exists(test_folder):
        shutil.rmtree(test_folder)
    os.makedirs(test_folder)

    profiler = BasicTimeProfiler()
    dummy_trainer = Engine(_do_nothing_update_fn)
    profiler.attach(dummy_trainer)

    @dummy_trainer.on(Events.STARTED)
    def delay_start(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.COMPLETED)
    def delay_complete(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.EPOCH_STARTED)
    def delay_epoch_start(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.EPOCH_COMPLETED)
    def delay_epoch_complete(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.ITERATION_STARTED)
    def delay_iter_start(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.ITERATION_COMPLETED)
    def delay_iter_complete(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.GET_BATCH_STARTED)
    def delay_get_batch_started(engine):
        time.sleep(true_event_handler_time)

    @dummy_trainer.on(Events.GET_BATCH_COMPLETED)
    def delay_get_batch_completed(engine):
        time.sleep(true_event_handler_time)

    dummy_trainer.run(range(true_num_iters), max_epochs=true_max_epochs)
    profiler.write_results(test_folder + "/test_log.csv")

    assert os.path.isfile(test_folder + "/test_log.csv")

    file_length = 0
    with open(test_folder + "/test_log.csv") as f:
        for l in f:
            file_length += 1

    assert file_length == (true_max_epochs * true_num_iters) + 1

    # cleanup test log directory
    shutil.rmtree(test_folder)
def test_event_handler_get_batch_started():
    true_event_handler_time = 0.1
    true_max_epochs = 1
    true_num_iters = 2

    profiler = BasicTimeProfiler()
    dummy_trainer = Engine(_do_nothing_update_fn)
    profiler.attach(dummy_trainer)

    @dummy_trainer.on(Events.GET_BATCH_STARTED)
    def delay_get_batch_started(engine):
        time.sleep(true_event_handler_time)

    dummy_trainer.run(range(true_num_iters), max_epochs=true_max_epochs)
    results = profiler.get_results()
    event_results = results['event_handlers_stats']['Events_GET_BATCH_STARTED']

    assert event_results['min/index'][0] == approx(true_event_handler_time, abs=1e-1)
    assert event_results['max/index'][0] == approx(true_event_handler_time, abs=1e-1)
    assert event_results['mean'] == approx(true_event_handler_time, abs=1e-1)
    assert event_results['std'] == approx(0., abs=1e-1)
    assert event_results['total'] == approx(
        true_max_epochs * true_num_iters * true_event_handler_time, abs=1e-1
    )
Пример #12
0
def test_print_results_basic_profiler(capsys):

    true_max_epochs = 1
    true_num_iters = 5

    profiler = BasicTimeProfiler()
    dummy_trainer = get_prepared_engine_for_basic_profiler(true_event_handler_time=0.0125)
    profiler.attach(dummy_trainer)

    dummy_trainer.run(range(true_num_iters), max_epochs=true_max_epochs)
    BasicTimeProfiler.print_results(profiler.get_results())

    captured = capsys.readouterr()
    out = captured.out
    assert "BasicTimeProfiler._" not in out
    assert "nan" not in out
Пример #13
0
def test_print_results(capsys):
    true_event_handler_time = 0.0
    true_max_epochs = 1
    true_num_iters = 1

    profiler = BasicTimeProfiler()
    dummy_trainer = Engine(_do_nothing_update_fn)
    profiler.attach(dummy_trainer)

    @dummy_trainer.on(Events.GET_BATCH_COMPLETED)
    def delay_get_batch_completed(engine):
        time.sleep(true_event_handler_time)

    dummy_trainer.run(range(true_num_iters), max_epochs=true_max_epochs)
    results = profiler.get_results()
    BasicTimeProfiler.print_results(profiler.get_results())

    captured = capsys.readouterr()
    out = captured.out

    assert out[0] == "\n"
Пример #14
0
def test_write_results_basic_profiler(dirname):
    true_event_handler_time = 0.125
    true_max_epochs = 3
    true_num_iters = 2

    profiler = BasicTimeProfiler()
    dummy_trainer = get_prepared_engine_for_basic_profiler(true_event_handler_time)
    profiler.attach(dummy_trainer)

    dummy_trainer.run(range(true_num_iters), max_epochs=true_max_epochs)
    fp = os.path.join(dirname, "test_log.csv")
    profiler.write_results(fp)

    assert os.path.isfile(fp)

    file_length = 0
    with open(fp) as f:
        for _ in f:
            file_length += 1

    assert file_length == (true_max_epochs * true_num_iters) + 1
Пример #15
0
def test_profilers_wrong_inputs():
    profiler = BasicTimeProfiler()
    with pytest.raises(TypeError, match=r"Argument engine should be ignite.engine.Engine"):
        profiler.attach(None)

    with pytest.raises(RuntimeError, match=r"Need pandas to write results as files"):
        with patch.dict("sys.modules", {"pandas": None}):
            profiler.write_results("")

    profiler = HandlersTimeProfiler()
    with pytest.raises(TypeError, match=r"Argument engine should be ignite.engine.Engine"):
        profiler.attach(None)

    with pytest.raises(RuntimeError, match=r"Need pandas to write results as files"):
        with patch.dict("sys.modules", {"pandas": None}):
            profiler.write_results("")
def create_trainer(loader, model, opt, loss_fn, device, args):

    def _update(engine, batch):
        model.train()

        x = batch['x'].to(engine.state.device, non_blocking=True)
        y = batch['y'].to(engine.state.device, non_blocking=True)
        m = batch['m'].to(engine.state.device, non_blocking=True)
        opt.zero_grad()
        y_pred = model(x)

        softmax = nn.Softmax()
        masked_loss = softmax(y_pred)
        #masked_loss = y_pred*m
        loss = loss_fn(masked_loss, y)
        if m.sum().item() / m.numel() > 0.7:
            loss.backward()
            opt.step()
        masked_loss = (masked_loss>0.5).float()
        acc = accuracy_segmentation(masked_loss[:,1,:,:,:],y[:,1,:,:,:])

        return {
            'x': x.detach(),
            'y': y.detach(),
            'm': m.detach(),
            'y_pred': y_pred.detach(),
            'loss': loss.item(),
            'acc' : acc
        }

    def _inference(engine, batch):
        model.eval()

        with th.no_grad():
            x = batch['x'].to(engine.state.device, non_blocking=True)
            y = batch['y'].to(engine.state.device, non_blocking=True)
            m = batch['m'].to(engine.state.device, non_blocking=True)

            y_pred = model(x)
            
            softmax = nn.Softmax(dim=1)
            masked_loss = softmax(y_pred)
            #masked_loss = y_pred*m
            loss = loss_fn(masked_loss, y)
            masked_loss = (masked_loss[-3:]>0.5).float()
            acc = accuracy_segmentation(masked_loss[:,1,:,:,:],y[:,1,:,:,:])

        return {
            'x': x.detach(),
            'y': y.detach(),
            'm': m.detach(),
            'y_pred': y_pred.detach(),
            'loss': loss.item(),
            'acc' : acc
        }


    #wandb.watch(model, log ='all')

    trainer = Engine(_update)
    evaluator = Engine(_inference)

    profiler = BasicTimeProfiler()
    profiler.attach(trainer)
    logdir = args.logdir
    save_ = (not args.devrun) and (not args.nosave)

    # initialize trainer state
    trainer.state.device = device
    trainer.state.hparams = args
    trainer.state.save = save_
    trainer.state.logdir = logdir

    trainer.state.df = defaultdict(dict)
    trainer.state.metrics = dict()
    trainer.state.val_metrics = dict()
    trainer.state.best_metrics = defaultdict(list)
    trainer.state.gradnorm = defaultdict(dict)

    # initialize evaluator state
    evaluator.logger = setup_logger('evaluator')
    evaluator.state.device = device
    evaluator.state.df = defaultdict(dict)
    evaluator.state.metrics = dict()

    pbar = ProgressBar(persist=True)
    ebar = ProgressBar(persist=False)

    pbar.attach(trainer, ['loss'])
    ebar.attach(evaluator, ['loss'])

    pbar.attach(trainer,['acc'])
    ebar.attach(evaluator,['acc'])

    # model summary
    if args.model_summary:
        trainer.add_event_handler(
            Events.STARTED,
            print_model_summary, model
        )

    # terminate on nan
    trainer.add_event_handler(
        Events.ITERATION_COMPLETED,
        TerminateOnNan(lambda x: x['loss'])
    )

    # metrics
    trainer.add_event_handler(
        Events.ITERATION_COMPLETED,
        _metrics
    )

    evaluator.add_event_handler(
        Events.ITERATION_COMPLETED,
        _metrics
    )

    trainer.add_event_handler(
        Events.EPOCH_COMPLETED,
        _metrics_mean
    )

    evaluator.add_event_handler(
        Events.COMPLETED,
        _metrics_mean
    )

    trainer.add_event_handler(
        #Events.STARTED | Events.EPOCH_COMPLETED,
        Events.EPOCH_COMPLETED,
        _evaluate, evaluator, loader
    )

    # logging
    trainer.add_event_handler(
        Events.EPOCH_COMPLETED,
        _log_metrics
    )

    # early stopping
    if args.early_stopping > 0:
        es_p = args.early_stopping
        es_s = lambda engine: -engine.state.metrics['loss']
        evaluator.add_event_handler(
            Events.COMPLETED,
            EarlyStopping(patience=es_p, score_function=es_s, trainer=trainer)
        )

    # lr schedulers
    if args.epoch_length is None:
        el = len(loader['train'])
    else:
        el = args.epoch_length

    if args.lr_scheduler is not None:
        lr_sched = create_lr_scheduler(opt, args, num_steps=el)

        if args.lr_scheduler != 'plateau':
            def _sched_fun(engine):
                lr_sched.step()
        else:
            def _sched_fun(engine):
                e = engine.state.epoch
                v = engine.state.val_metrics[e]['nmse']
                lr_sched.step(v)

        if args.lr_scheduler == 'linearcycle':
            trainer.add_event_handler(Events.ITERATION_STARTED, lr_sched)
        else:
            trainer.add_event_handler(Events.EPOCH_COMPLETED, _sched_fun)

    # FIXME: warmup is modifying opt base_lr -> must create last
    if args.lr_warmup > 0:
        wsched = create_lr_scheduler(opt, args, 'warmup', num_steps=el)
        wsts = wsched.total_steps
        trainer.add_event_handler(
            Events.ITERATION_COMPLETED(event_filter=lambda _, i: i <= wsts),
            lambda _: wsched.step()
        )

    # saving
    if save_:
        to_save = {
            'model': model,
            'optimizer': opt,
            'trainer': trainer,
            'evaluator': evaluator
        }

        trainer.add_event_handler(
            Events.EPOCH_COMPLETED,
            Checkpoint(to_save, DiskSaver(logdir), n_saved=3)
        )

        # handler = Checkpoint(
        #     {'model': model},
        #     DiskSaver(logdir),
        #     n_saved = 3,
        #     filename_prefix = 'best',
        #     score_function = lambda engine: -engine.state.metrics['nmae'],
        #     score_name = 'val_nmae',
        # )

        # evaluator.add_event_handler(
        #     Events.COMPLETED,
        #     handler
        # )

        # handler = Checkpoint(
        #     {'model': model},
        #     DiskSaver(logdir),
        #     n_saved = 3,
        #     filename_prefix = 'best',
        #     score_function = lambda engine: -engine.state.metrics['nmse'],
        #     score_name = 'val_nmse',
        # )

        # evaluator.add_event_handler(
        #     Events.COMPLETED,
        #     handler
        # )

        # handler = Checkpoint(
        #     {'model': model},
        #     DiskSaver(logdir),
        #     n_saved = 3,
        #     filename_prefix = 'best',
        #     score_function = lambda engine: engine.state.metrics['R2'],
        #     score_name = 'val_R2',
        # )

        # evaluator.add_event_handler(
        #     Events.COMPLETED,
        #     handler
        # )

        trainer.add_event_handler(
            Events.EPOCH_COMPLETED,
            _save_metrics
        )

        # timer
        trainer.add_event_handler(
            Events.COMPLETED | Events.TERMINATE,
            lambda _: profiler.write_results(logdir + '/time.csv')
        )

    return trainer