示例#1
0
def train(cfg, network):
#     cfg.train.num_workers = 0
    if cfg.train.dataset[:4] != 'City':
        torch.multiprocessing.set_sharing_strategy('file_system')
        
    train_loader = make_data_loader(cfg, is_train=True, max_iter=cfg.ep_iter)
    val_loader = make_data_loader(cfg, is_train=False)
    # train_loader = make_data_loader(cfg, is_train=True, max_iter=100)
    
    trainer = make_trainer(cfg, network)
    optimizer = make_optimizer(cfg, network)
    scheduler = make_lr_scheduler(cfg, optimizer)
    recorder = make_recorder(cfg)
    evaluator = make_evaluator(cfg)

    begin_epoch = load_model(network, optimizer, scheduler, recorder, cfg.model_dir, resume=cfg.resume)
    # set_lr_scheduler(cfg, scheduler)

    for epoch in range(begin_epoch, cfg.train.epoch):
        recorder.epoch = epoch
        trainer.train(epoch, train_loader, optimizer, recorder)
        scheduler.step()

        if (epoch + 1) % cfg.save_ep == 0:
            save_model(network, optimizer, scheduler, recorder, epoch, cfg.model_dir)

        if (epoch + 1) % cfg.eval_ep == 0:
            trainer.val(epoch, val_loader, evaluator, recorder)

    return network
示例#2
0
def run_evaluate():
    from lib.datasets import make_data_loader
    from lib.evaluators import make_evaluator
    import tqdm
    import torch
    from lib.networks import make_network
    from lib.utils.net_utils import load_network

    if DEBUG:
        print(
            '-------------------------------Evaluating---------------------------------'
        )

    network = make_network(cfg).cuda()
    load_network(network, cfg.model_dir, epoch=cfg.test.epoch)
    network.eval()

    data_loader = make_data_loader(cfg, is_train=False)
    evaluator = make_evaluator(cfg)
    for batch in tqdm.tqdm(data_loader):
        inp = batch['inp'].cuda()
        with torch.no_grad():
            output = network(inp)
        evaluator.evaluate(output, batch)
    evaluator.summarize()
示例#3
0
def train(cfg, network):
    trainer = make_trainer(cfg, network)
    optimizer = make_optimizer(cfg, network)
    scheduler = make_lr_scheduler(cfg, optimizer)
    recorder = make_recorder(cfg)
    evaluator = make_evaluator(cfg)

    begin_epoch = load_model(network, optimizer, scheduler, recorder, cfg.model_dir, resume=cfg.resume)
    # set_lr_scheduler(cfg, scheduler)

    train_loader = make_data_loader(cfg, is_train=True)
    val_loader = make_data_loader(cfg, is_train=False)

    for epoch in range(begin_epoch, cfg.train.epoch):
        recorder.epoch = epoch
        trainer.train(epoch, train_loader, optimizer, recorder)
        scheduler.step()

        if (epoch + 1) % cfg.save_ep == 0:
            save_model(network, optimizer, scheduler, recorder, epoch, cfg.model_dir)

        if (epoch + 1) % cfg.eval_ep == 0:
            trainer.val(epoch, val_loader, evaluator, recorder)

    return network
示例#4
0
文件: run.py 项目: zlou/neuralbody
def run_evaluate():
    from lib.datasets import make_data_loader
    from lib.evaluators import make_evaluator
    import tqdm
    import torch
    from lib.networks import make_network
    from lib.utils import net_utils
    from lib.networks.renderer import make_renderer

    cfg.perturb = 0

    network = make_network(cfg).cuda()
    net_utils.load_network(network,
                           cfg.trained_model_dir,
                           resume=cfg.resume,
                           epoch=cfg.test.epoch)
    network.train()

    data_loader = make_data_loader(cfg, is_train=False)
    renderer = make_renderer(cfg, network)
    evaluator = make_evaluator(cfg)
    for batch in tqdm.tqdm(data_loader):
        for k in batch:
            if k != 'meta':
                batch[k] = batch[k].cuda()
        with torch.no_grad():
            output = renderer.render(batch)
        evaluator.evaluate(output, batch)
    evaluator.summarize()
示例#5
0
def run_evaluate():
    from lib.datasets import make_data_loader
    from lib.evaluators import make_evaluator
    import tqdm
    import torch
    from lib.networks import make_network
    from lib.utils.net_utils import load_network
    from lib.train import make_trainer

    network = make_network(cfg).cuda()
    load_network(network, cfg.model_dir, epoch=cfg.test.epoch)
    trainer = make_trainer(cfg, network)
    network.eval()

    data_loader = make_data_loader(cfg, is_train=False)
    if 'Coco' in cfg.train.dataset:
        trainer.val_coco(data_loader)
    else:
        evaluator = make_evaluator(cfg)
        for batch in tqdm.tqdm(data_loader):
            inp = batch['inp'].cuda()
            with torch.no_grad():
                output = network(inp)
            evaluator.evaluate(output, batch)
        evaluator.summarize()
示例#6
0
def test(cfg, network):
    trainer = make_trainer(cfg, network)
    val_loader = make_data_loader(cfg, is_train=False)
    evaluator = make_evaluator(cfg)
    epoch = load_network(network,
                         cfg.model_dir,
                         resume=cfg.resume,
                         epoch=cfg.test.epoch)
    trainer.val(epoch, val_loader, evaluator)
示例#7
0
def train(cfg, network):
    if cfg.train.dataset[:4] != 'City':
        torch.multiprocessing.set_sharing_strategy('file_system')
    trainer = make_trainer(cfg, network)
    optimizer = make_optimizer(cfg, network)
    scheduler = make_lr_scheduler(cfg, optimizer)
    recorder = make_recorder(cfg)
    if 'Coco' not in cfg.train.dataset:
        evaluator = make_evaluator(cfg)

    begin_epoch = load_model(network,
                             optimizer,
                             scheduler,
                             recorder,
                             cfg.model_dir,
                             resume=cfg.resume)
    # set_lr_scheduler(cfg, scheduler)

    train_loader = make_data_loader(cfg, is_train=True)
    val_loader = make_data_loader(cfg, is_train=False)
    # train_loader = make_data_loader(cfg, is_train=True, max_iter=100)

    global_steps = None
    if cfg.neptune:
        global_steps = {
            'train_global_steps': 0,
            'valid_global_steps': 0,
        }

        neptune.init('hccccccccc/clean-pvnet')
        neptune.create_experiment(cfg.model_dir.split('/')[-1])
        neptune.append_tag('pose')

    for epoch in range(begin_epoch, cfg.train.epoch):
        recorder.epoch = epoch
        trainer.train(epoch, train_loader, optimizer, recorder, global_steps)
        scheduler.step()

        if (epoch + 1) % cfg.save_ep == 0:
            save_model(network, optimizer, scheduler, recorder, epoch,
                       cfg.model_dir)

        if (epoch + 1) % cfg.eval_ep == 0:
            if 'Coco' in cfg.train.dataset:
                trainer.val_coco(val_loader, global_steps)
            else:
                trainer.val(epoch, val_loader, evaluator, recorder)

    if cfg.neptune:
        neptune.stop()

    return network
示例#8
0
文件: run.py 项目: zlou/neuralbody
def run_evaluate_nv():
    from lib.datasets import make_data_loader
    from lib.evaluators import make_evaluator
    import tqdm
    from lib.utils import net_utils

    data_loader = make_data_loader(cfg, is_train=False)
    evaluator = make_evaluator(cfg)
    for batch in tqdm.tqdm(data_loader):
        for k in batch:
            if k != 'meta':
                batch[k] = batch[k].cuda()
        evaluator.evaluate(batch)
    evaluator.summarize()
示例#9
0
def train(cfg, network):
    trainer = make_trainer(cfg, network)
    optimizer = make_optimizer(cfg, network)
    scheduler = make_lr_scheduler(cfg, optimizer)
    recorder = make_recorder(cfg)
    evaluator = make_evaluator(cfg)

    begin_epoch = load_model(network,
                             optimizer,
                             scheduler,
                             recorder,
                             cfg.model_dir,
                             resume=cfg.resume)
    # begin_epoch = 0  #如果要继续训练那么请注释这一行
    # set_lr_scheduler(cfg, scheduler)
    # print("before train loader")
    train_loader = make_data_loader(cfg, is_train=True)  #到这里才读取的数据
    # print("under train loader")
    val_loader = make_data_loader(cfg, is_train=False)

    # #这里是查看train_loader的相关参数个结构
    # tmp_file = open('/home/tianhao.lu/code/Deep_snake/snake/Result/Contour/contour.log', 'w')
    # tmp_file.writelines("train_loader type:" + str(type(train_loader)) + "\n")
    # tmp_file.writelines("train_loader len:" + str(len(train_loader)) + "\n")
    # tmp_file.writelines("train_loader data:" + str(train_loader) + "\n")
    # for tmp_data in train_loader:
    #     tmp_file.writelines("one train_loader data type:" + str(type(tmp_data)) + "\n")
    #     for key in tmp_data:
    #         tmp_file.writelines("one train_loader data key:" + str(key) + "\n")
    #         tmp_file.writelines("one train_loader data len:" + str(len(tmp_data[key])) + "\n")
    #     # tmp_file.writelines("one train_loader data:" + str(tmp_data) + "\n")
    #     break
    # tmp_file.writelines(str("*************************************************************** \n"))
    # tmp_file.close()

    for epoch in range(begin_epoch, cfg.train.epoch):
        recorder.epoch = epoch
        trainer.train(epoch, train_loader, optimizer, recorder)
        scheduler.step()  #optimizer.step()模型才会更新,scheduler.step()是用来调整lr的

        if (epoch + 1) % cfg.save_ep == 0:
            save_model(network, optimizer, scheduler, recorder, epoch,
                       cfg.model_dir)

        if (epoch + 1) % cfg.eval_ep == 0:
            trainer.val(epoch, val_loader, evaluator, recorder)

    return network
示例#10
0
def run_evaluate():
    from lib.datasets import make_data_loader
    from lib.evaluators import make_evaluator
    import tqdm
    import torch
    import pickle
    import lzma
    from lib.networks import make_network
    from lib.utils.net_utils import load_network
    from lib.evaluators.custom.monitor import MetricMonitor
    from lib.visualizers import make_visualizer

    torch.manual_seed(0)

    monitor = MetricMonitor()
    network = make_network(cfg).cuda()
    epoch = load_network(network, cfg.model_dir, epoch=cfg.test.epoch)
    network.eval()
    print("Trainable parameters: {}".format(
        sum(p.numel() for p in network.parameters())))

    data_loader = make_data_loader(cfg, is_train=False)
    evaluator = make_evaluator(cfg)
    visualizer = make_visualizer(cfg)
    idx = 0

    if args.vis_out:
        os.mkdir(args.vis_out)

    for batch in tqdm.tqdm(data_loader):
        idx += 1
        inp = batch['inp'].cuda()
        with torch.no_grad():
            output = network(inp)
        evaluator.evaluate(output, batch)

        if args.vis_out:
            err = evaluator.data["obj_drilltip_trans_3d"][-1]
            visualizer.visualize(
                output, batch,
                os.path.join(
                    args.vis_out,
                    "tiperr{:.4f}_idx{:04d}.png".format(err.item(), idx)))
    result = evaluator.summarize()
    monitor.add('val', epoch, result)
    monitor.save_metrics("metrics.pkl")
    monitor.plot_histogram("evaluation.html", plotly=True)
示例#11
0
def train(cfg, network):
    trainer = make_trainer(cfg, network)
    optimizer = make_optimizer(cfg, network)
    scheduler = make_lr_scheduler(cfg, optimizer)
    recorder = make_recorder(cfg)
    evaluator = make_evaluator(cfg)

    begin_epoch = load_model(network,
                             optimizer,
                             scheduler,
                             recorder,
                             cfg.trained_model_dir,
                             resume=cfg.resume)
    set_lr_scheduler(cfg, scheduler)

    train_loader = make_data_loader(cfg,
                                    is_train=True,
                                    is_distributed=cfg.distributed,
                                    max_iter=cfg.ep_iter)
    val_loader = make_data_loader(cfg, is_train=False)

    for epoch in range(begin_epoch, cfg.train.epoch):
        recorder.epoch = epoch
        if cfg.distributed:
            train_loader.batch_sampler.sampler.set_epoch(epoch)

        trainer.train(epoch, train_loader, optimizer, recorder)
        scheduler.step()

        if (epoch + 1) % cfg.save_ep == 0 and cfg.local_rank == 0:
            save_model(network, optimizer, scheduler, recorder,
                       cfg.trained_model_dir, epoch)

        if (epoch + 1) % cfg.save_latest_ep == 0 and cfg.local_rank == 0:
            save_model(network,
                       optimizer,
                       scheduler,
                       recorder,
                       cfg.trained_model_dir,
                       epoch,
                       last=True)

        if (epoch + 1) % cfg.eval_ep == 0:
            trainer.val(epoch, val_loader, evaluator, recorder)

    return network
示例#12
0
def run_evaluate():
    from lib.datasets import make_data_loader
    from lib.evaluators import make_evaluator
    import tqdm
    import torch
    from lib.networks import make_network
    from lib.utils.net_utils import load_network

    import numpy as np
    np.random.seed(1000)
    torch.manual_seed(0)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    network = make_network(cfg).cuda()
    load_network(network, cfg.model_dir, epoch=cfg.test.epoch)
    network.eval()

    data_loader = make_data_loader(cfg, is_train=False)
    evaluator = make_evaluator(cfg)

    count = 1200
    i = 0
    for batch in tqdm.tqdm(data_loader):
        if i == count:
            break
        inp = batch['inp'].cuda()
        # save input
        #         print(batch['img_id'])
        #         import pickle
        #         with open('/mbrdi/sqnap1_colomirror/gupansh/input_cat.pkl','wb') as fp:
        #             pickle.dump(batch['inp'], fp)
        #         input()
        seg_gt = batch['mask'].cuda()
        with torch.no_grad():
            output = network(inp)
        evaluator.evaluate(output, batch)

        i += 1
    evaluator.summarize()