Пример #1
0
def test_dataloader_builder():
    cfg = {"sampler": sampler_cfgs}
    dataloader = dataloader_builder.build(cfg)
    collate_fn1 = dataloader.collate_fn
    assert isinstance(collate_fn1, functools.partial)
    assert collate_fn1.func == _collate_fn
    cfg = {"sampler": sampler_cfg}
    dataloader = dataloader_builder.build(cfg)
    collate_fn2 = dataloader.collate_fn
    # TODO
    assert collate_fn2 == default_collate
Пример #2
0
def test_dataset_simple():
    dataloader = dataloader_builder.build(dataloader_cfg)
    for idx, data in enumerate(dataloader):
        assert 'img' in data
        assert 'path' in data

        assert isinstance(data['img'], torch.Tensor)
        assert data['path'] != ERROR_STRING
        if idx > 500:
            break
Пример #3
0
def test_reid_evaluation():
    dataloader = dataloader_builder.build(reid_cfg)
    # restore
    model_cfgs = evaluation_model_builder.build(baseline_model_cfg)

    model = model_builder.build(model_cfgs[0])
    model = DataParallel(model)
    _run = {'config': {'device': torch.device('cuda')}}
    score = evaluate([dataloader], model, _run, "test")

    print(score)
Пример #4
0
def test_multi_dataset():
    size1 = 70
    size2 = 100
    dummy_cfg_small = {
        "name": "dummy",
        "id": "dummy_small",
        "size": size1,
        "data_dir": "/"
    }

    dummy_cfg_large = {
        "name": "dummy",
        "id": "dummy_large",
        "size": size2,
        "data_dir": "/"
    }

    sequential_cfg1 = {
        "type": "sequential",
        "dataset": dummy_cfg_small,
        "batch_size": 1,
        "drop_last": True
    }

    sequential_cfg2 = {
        "type": "sequential",
        "dataset": dummy_cfg_large,
        "batch_size": 1,
        "drop_last": True
    }

    sampler_cfg = {
        "type": "concatenated_longest",
        "samplers": {
            "sampler1": sequential_cfg1,
            "sampler2": sequential_cfg2
        }
    }

    dataloader_cfg = {"sampler": sampler_cfg}

    dataloader = dataloader_builder.build(dataloader_cfg)

    for idx, data in enumerate(dataloader):
        assert data['path'][0].startswith("dummy_small")
        assert data['path'][1].startswith("dummy_large")

    test = size1 if size1 > size2 else size2
    print(test, idx)
def build(cfg):
    #TODO: Make this consistent in terms of return vals
    evaluation_cfgs = dict()
    dataloaders = []
    for name, dataloader_cfg in cfg:
        dataloader = dataloader_builder.build(dataloader_cfg)
        dataloaders.append(dataloader)

    model_cfg = cfg['model']

    model_cfgs = evaluation_model_builder.build(cfg['model'])
    # overwrite restored values
    for model_cfg in model_cfgs:
        model_cfg.update(cfg['model'])
    return evaluation_cfgs, model_cfgs
Пример #6
0
def evaluate_checkpoint_on(restore_checkpoint,
                           dataset_cfg,
                           _run,
                           model_update_cfg={}):
    model_cfg, _, epoch = utils.restore_checkpoint(restore_checkpoint,
                                                   model_cfg=model_update_cfg,
                                                   map_location='cpu')
    #model_cfg['backbone']['output_dim'] = 256
    dataloaders = dataloader_builder.build(dataset_cfg)
    model = model_builder.build(model_cfg)
    # TODO needs to be from dataset
    if 'seg_class_mapping' in model_cfg:
        mapping = model_cfg['seg_class_mapping']
    else:
        mapping = None

    model.seg_mapping = mapping

    model = torch.nn.DataParallel(model, device_ids=_run.config['device_id'])
    model = model.cuda()
    return evaluate(dataloaders, model, epoch, keep=True)
Пример #7
0
def run_train(dataloader_cfg, model_cfg, scheduler_cfg, optimizer_cfg,
              loss_cfg, validation_cfg, checkpoint_frequency,
              restore_checkpoint, max_epochs, _run):

    # Lets cuDNN benchmark conv implementations and choose the fastest.
    # Only good if sizes stay the same within the main loop!
    torch.backends.cudnn.benchmark = True
    exit_handler = ExitHandler()

    device = _run.config['device']
    device_id = _run.config['device_id']

    # during training just one dataloader
    dataloader = dataloader_builder.build(dataloader_cfg)[0]

    epoch = 0
    if restore_checkpoint is not None:
        model_cfg, optimizer_cfg, epoch = utils.restore_checkpoint(
            restore_checkpoint, model_cfg, optimizer_cfg)

    def overwrite(to_overwrite, dic):
        to_overwrite.update(dic)
        return to_overwrite

    # some models depend on dataset, for example num_joints
    model_cfg = overwrite(dataloader.dataset.info, model_cfg)
    model = model_builder.build(model_cfg)

    loss_cfg['model'] = model
    loss = loss_builder.build(loss_cfg)
    loss = loss.to(device)

    parameters = list(model.parameters()) + list(loss.parameters())
    optimizer = optimizer_builder.build(optimizer_cfg, parameters)

    lr_scheduler = scheduler_builder.build(scheduler_cfg, optimizer, epoch)

    if validation_cfg is None:
        validation_dataloaders = None
    else:
        validation_dataloaders = dataloader_builder.build(validation_cfg)
        keep = False

    file_logger = log.get_file_logger()
    logger = log.get_logger()

    model = torch.nn.DataParallel(model, device_ids=device_id)
    model.cuda()

    model = model.train()
    trained_models = []

    exit_handler.register(file_logger.save_checkpoint, model, optimizer,
                          "atexit", model_cfg)

    start_training_time = time.time()
    end = time.time()
    while epoch < max_epochs:
        epoch += 1
        lr_scheduler.step()
        logger.info("Starting Epoch %d/%d", epoch, max_epochs)
        len_batch = len(dataloader)
        acc_time = 0
        for batch_id, data in enumerate(dataloader):
            optimizer.zero_grad()
            endpoints = model(data, model.module.endpoints)
            logger.debug("datasets %s", list(data['split_info'].keys()))

            data.update(endpoints)
            # threoretically losses could also be caluclated distributed.
            losses = loss(endpoints, data)
            loss_mean = torch.mean(losses)
            loss_mean.backward()
            optimizer.step()

            acc_time += time.time() - end
            end = time.time()

            report_after_batch(_run=_run,
                               logger=logger,
                               batch_id=batch_id,
                               batch_len=len_batch,
                               acc_time=acc_time,
                               loss_mean=loss_mean,
                               max_mem=torch.cuda.max_memory_allocated())

        if epoch % checkpoint_frequency == 0:
            path = file_logger.save_checkpoint(model, optimizer, epoch,
                                               model_cfg)
            trained_models.append(path)

        report_after_epoch(_run=_run, epoch=epoch, max_epoch=max_epochs)

        if validation_dataloaders is not None and \
                epoch % checkpoint_frequency == 0:
            model.eval()

            # Lets cuDNN benchmark conv implementations and choose the fastest.
            # Only good if sizes stay the same within the main loop!
            # not the case for segmentation
            torch.backends.cudnn.benchmark = False
            score = evaluate(validation_dataloaders, model, epoch, keep=keep)
            logger.info(score)
            log_score(score, _run, prefix="val_", step=epoch)
            torch.backends.cudnn.benchmark = True
            model.train()

    report_after_training(_run=_run,
                          max_epoch=max_epochs,
                          total_time=time.time() - start_training_time)
    path = file_logger.save_checkpoint(model, optimizer, epoch, model_cfg)
    if path:
        trained_models.append(path)
    file_logger.close()
    # TODO get best performing val model
    evaluate_last = _run.config['training'].get('evaluate_last', 1)
    if len(trained_models) < evaluate_last:
        logger.info("Only saved %d models (evaluate_last=%d)",
                    len(trained_models), evaluate_last)
    return trained_models[-evaluate_last:]
def test_multi_dataset_loader(num_workers, sampler):
    P = 6
    K = 10
    dummy_cfg1 = {
        "name": "dummy",
        "num_pids": 100,
        "id": "dummy1",
        "size": 500,
        "data_dir": "/"
    }

    pk_cfg = {
        "type": "pk",
        "dataset": dummy_cfg1,
        "P": P,
        "K": K,
        "drop_last": True
    }

    dummy_cfg2 = {
        "name": "dummy",
        "id": "dummy2",
        "size": 750,
        "data_dir": "/"

    }
    sequential_cfg = {
        "type": "sequential",
        "dataset": dummy_cfg2,
        "batch_size": 70
    }


    sampler_cfg = {
        "type": sampler,
        "samplers": {
            "sampler1": pk_cfg,
            "sampler2": sequential_cfg
        }
    }
    dataloader_cfg = {
        "sampler": sampler_cfg,
        "num_workers": num_workers
    }

    dataloader = dataloader_builder.build(dataloader_cfg)


    start = time.time()
    for batch_data in dataloader:
        actual_split = {"dummy1": [], "dummy2": []}
        for idx, dataset_name in enumerate(batch_data['path']):
            if dataset_name == "dummy1":
                actual_split["dummy1"].append(idx)
            elif dataset_name == "dummy2":
                actual_split["dummy2"].append(idx)
            else:
                raise RuntimeError

        sampler_info = batch_data['split_info']
        for dataset, idxs in sampler_info.items():
            assert len(idxs) == len(actual_split[dataset])
            for a, b in zip(idxs, actual_split[dataset]):
                assert a == b

        idxs1 = sampler_info.get("dummy1")
        idxs2 = sampler_info.get("dummy2")

        if idxs1:
            print(np.array(batch_data['pid'])[idxs1])
        if idxs2:
            print(np.array(batch_data['pid'])[idxs2])

    print("Took {}".format(time.time()-start))