Exemplo n.º 1
0
def main():
    rank, world_size = dist_init()
    cfg.merge_from_file(args.cfg)
    if rank == 0:
        if not os.path.exists(cfg.TRAIN.LOG_DIR):
            os.makedirs(cfg.TRAIN.LOG_DIR)
        init_log('global', logging.INFO)
        if cfg.TRAIN.LOG_DIR:
            add_file_handler('global',
                             os.path.join(cfg.TRAIN.LOG_DIR, 'logs.txt'),
                             logging.INFO)
        logger.info("Version Information: \n{}\n".format(commit()))
        logger.info("config \n{}".format(json.dumps(cfg, indent=4)))

    logger.info('dist init done!')
    train_dataloader = build_data_loader()
    model = get_model('BaseSiamModel').cuda().train()
    dist_model = DistModule(model)
    optimizer, lr_scheduler = build_optimizer_lr(dist_model.module,
                                                 cfg.TRAIN.START_EPOCH)
    if cfg.TRAIN.BACKBONE_PRETRAIN:
        logger.info('load backbone from {}.'.format(cfg.TRAIN.BACKBONE_PATH))
        model.backbone = load_pretrain(model.backbone, cfg.TRAIN.BACKBONE_PATH)
        logger.info('load backbone done!')
    if cfg.TRAIN.RESUME:
        logger.info('resume from {}'.format(cfg.TRAIN.RESUME_PATH))
        model, optimizer, cfg.TRAIN.START_EPOCH = restore_from(
            model, optimizer, cfg.TRAIN.RESUME_PATH)
        logger.info('resume done!')
    elif cfg.TRAIN.PRETRAIN:
        logger.info('load pretrain from {}.'.format(cfg.TRAIN.PRETRAIN_PATH))
        model = load_pretrain(model, cfg.TRAIN.PRETRAIN_PATH)
        logger.info('load pretrain done')
    dist_model = DistModule(model)
    train(train_dataloader, dist_model, optimizer, lr_scheduler)
Exemplo n.º 2
0
def main():
    global args, logger, v_id
    args = parser.parse_args()
    cfg = load_config(args)

    init_log('global', logging.INFO)
    if args.log != "":
        add_file_handler('global', args.log, logging.INFO)

    logger = logging.getLogger('global')
    logger.info(args)

    # setup model
    if args.arch == 'Custom':
        from custom import Custom
        model = Custom(anchors=cfg['anchors'])
    else:
        parser.error('invalid architecture: {}'.format(args.arch))

    if args.resume:
        assert isfile(args.resume), '{} is not a valid file'.format(args.resume)
        model = load_pretrain(model, args.resume)
    model.eval()
    device = torch.device('cuda' if (torch.cuda.is_available() and not args.cpu) else 'cpu')
    model = model.to(device)
    # setup dataset
    dataset = load_dataset(args.dataset)

    # VOS or VOT?
    if args.dataset in ['DAVIS','DAVIS2016', 'DAVIS2017', 'ytb_vos'] and args.mask:
        vos_enable = True  # enable Mask output
    else:
        vos_enable = False

    total_lost = 0  # VOT
    iou_lists = []  # VOS
    speed_list = []

    for v_id, video in enumerate(dataset.keys(), start=1):
        if args.video != '' and video != args.video:
            continue

        if vos_enable:
            iou_list, speed = track_vos(model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None,
                                 args.mask, args.refine, args.dataset in ['DAVIS2017', 'ytb_vos'], device=device)
            iou_lists.append(iou_list)
        else:
            lost, speed = track_vot(model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None,
                             args.mask, args.refine, device=device)
            total_lost += lost
        speed_list.append(speed)

    # report final result
    if vos_enable:
        for thr, iou in zip(thrs, np.mean(np.concatenate(iou_lists), axis=0)):
            logger.info('Segmentation Threshold {:.2f} mIoU: {:.3f}'.format(thr, iou))
    else:
        logger.info('Total Lost: {:d}'.format(total_lost))

    logger.info('Mean Speed: {:.2f} FPS'.format(np.mean(speed_list)))
Exemplo n.º 3
0
def main():
    cfg.merge_from_file(args.cfg)
    if not os.path.exists(cfg.PRUNING.FINETUNE.LOG_DIR):
        os.makedirs(cfg.PRUNING.FINETUNE.LOG_DIR)
    init_log('global', logging.INFO)
    if cfg.PRUNING.FINETUNE.LOG_DIR:
        add_file_handler(
            'global', os.path.join(cfg.PRUNING.FINETUNE.LOG_DIR, 'logs.txt'),
            logging.INFO)
    logger.info("Version Information: \n{}\n".format(commit()))
    logger.info("config \n{}".format(json.dumps(cfg, indent=4)))

    train_dataloader = build_data_loader()
    model = PruningSiamModel()
    # load model from the pruning model
    logger.info('load pretrain from {}.'.format(
        cfg.PRUNING.FINETUNE.PRETRAIN_PATH))
    model = load_pretrain(model, cfg.PRUNING.FINETUNE.PRETRAIN_PATH)
    logger.info('load pretrain done')
    logger.info('begin to pruning the model')
    model = prune_model(model).cuda().train()
    logger.info('pruning finished!')

    optimizer, lr_scheduler = build_optimizer_lr(
        model, cfg.PRUNING.FINETUNE.START_EPOCH)
    if cfg.PRUNING.FINETUNE.RESUME:
        logger.info('resume from {}'.format(cfg.PRUNING.FINETUNE.RESUME_PATH))
        model, optimizer, cfg.PRUNING.FINETUNE.START_EPOCH = restore_from(
            model, optimizer, cfg.PRUNING.FINETUNE.RESUME_PATH)
        logger.info('resume done!')
    train(train_dataloader, model, optimizer, lr_scheduler)
Exemplo n.º 4
0
def main():
    global args, best_acc, tb_writer, logger
    args = parser.parse_args()  # args通过解析获得的

    init_log('global', logging.INFO)

    if args.log != "":
        add_file_handler('global', args.log, logging.INFO)

    logger = logging.getLogger('global')  # 实例化一个记录器
    logger.info("\n" + collect_env_info())
    logger.info(args)

    cfg = load_config(args)
    logger.info("config \n{}".format(json.dumps(
        cfg, indent=4)))  # 转变成json格式的文件,缩进4格

    if args.log_dir:
        tb_writer = SummaryWriter(args.log_dir)
    else:
        tb_writer = Dummy()

    # build dataset
    train_loader, val_loader = build_data_loader(cfg)

    if args.arch == 'Custom':
        from custom import Custom
        model = Custom(pretrain=True, anchors=cfg['anchors'])
    else:
        exit()
    logger.info(model)

    if args.pretrained:
        model = load_pretrain(model, args.pretrained)

    model = model.cuda()  # 模型转移到GPU上
    dist_model = torch.nn.DataParallel(
        model, list(range(torch.cuda.device_count()))).cuda()  # 多GPU训练

    if args.resume and args.start_epoch != 0:  # 这是在干啥?蒙蔽了!!!!!
        model.features.unfix((args.start_epoch - 1) / args.epochs)

    optimizer, lr_scheduler = build_opt_lr(model, cfg, args,
                                           args.start_epoch)  # 如何构建优化器和学习策略???
    # optionally resume from a checkpoint
    if args.resume:
        assert os.path.isfile(args.resume), '{} is not a valid file'.format(
            args.resume)
        model, optimizer, args.start_epoch, best_acc, arch = restore_from(
            model, optimizer, args.resume)
        dist_model = torch.nn.DataParallel(
            model, list(range(torch.cuda.device_count()))).cuda()

    logger.info(lr_scheduler)

    logger.info('model prepare done')

    train(train_loader, dist_model, optimizer, lr_scheduler, args.start_epoch,
          cfg)
Exemplo n.º 5
0
def main():
    global args, best_acc, tb_writer, logger
    args = parser.parse_args()

    init_log('global', logging.INFO) # 返回一个logger对象,logging_INFO是日志的等级

    if args.log != "":
        add_file_handler('global', args.log, logging.INFO)

    logger = logging.getLogger('global')  # 获取上面初始化的logger对象
    logger.info("\n" + collect_env_info())
    logger.info(args)

    cfg = load_config(args)  # 返回修改后的配置文件对象
    
    logger.info("config \n{}".format(json.dumps(cfg, indent=4)))  #json.loads()是将str转化成dict格式,json.dumps()是将dict转化成str格式。

    if args.log_dir:
        tb_writer = SummaryWriter(args.log_dir)
    else:
        tb_writer = Dummy()

    # build dataset
    train_loader, val_loader = build_data_loader(cfg)  

    if args.arch == 'Custom':
        from custom import Custom
        model = Custom(pretrain=True, anchors=cfg['anchors'])
    else:
        exit()
    logger.info(model)

    if args.pretrained:
        model = load_pretrain(model, args.pretrained)

    model = model.cuda()
    dist_model = torch.nn.DataParallel(model, list(range(torch.cuda.device_count()))).cuda()

    if args.resume and args.start_epoch != 0:
        model.features.unfix((args.start_epoch - 1) / args.epochs)

    optimizer, lr_scheduler = build_opt_lr(model, cfg, args, args.start_epoch)
    # optionally resume from a checkpoint
    if args.resume:
        assert os.path.isfile(args.resume), '{} is not a valid file'.format(args.resume)
        model, optimizer, args.start_epoch, best_acc, arch = restore_from(model, optimizer, args.resume)
        dist_model = torch.nn.DataParallel(model, list(range(torch.cuda.device_count()))).cuda()

    logger.info(lr_scheduler)

    logger.info('model prepare done')

    train(train_loader, dist_model, optimizer, lr_scheduler, args.start_epoch, cfg)
Exemplo n.º 6
0
def create_app(config_name):
    app = Flask(__name__)
    # 验证
    CORS(app, supports_credentials=True)
    app.config.from_object(config[config_name])
    config[config_name].init_app(app)
    ###初始化数据库
    # db.init_app(app)
    # 返回数据中response为中文
    app.config['JSON_AS_ASCII'] = False

    ###初始化日志###
    init_log()
    api.init_app(app)
    return app
Exemplo n.º 7
0
def main():
    global args, best_acc, tb_writer, logger
    args = parser.parse_args()
    args = args_process(args)

    init_log('global', logging.INFO)

    if args.log != "":
        add_file_handler('global', args.log, logging.INFO)

    logger = logging.getLogger('global')
    logger.info("\n" + collect_env_info())
    logger.info(args)

    cfg = load_config(args)
    logger.info("config \n{}".format(json.dumps(cfg, indent=4)))

    # build dataset
    train_loader, val_loader = build_data_loader(cfg)

    args.img_size = int(cfg['train_datasets']['search_size'])
    args.nms_threshold = float(cfg['train_datasets']['RPN_NMS'])
    if args.arch == 'Custom':
        from custom import Custom
        model = Custom(pretrain=True,
                       opts=args,
                       anchors=train_loader.dataset.anchors)
    else:
        exit()
    logger.info(model)

    if args.pretrained:
        model = load_pretrain(model, args.pretrained)
    else:
        raise Exception("Pretrained weights must be loaded!")

    model = model.cuda()
    dist_model = torch.nn.DataParallel(model,
                                       list(range(
                                           torch.cuda.device_count()))).cuda()

    logger.info('model prepare done')

    logger = logging.getLogger('global')
    val_avg = AverageMeter()

    validation(val_loader, dist_model, cfg, val_avg)
Exemplo n.º 8
0
def main():
    cfg.merge_from_file(args.cfg)
    if not os.path.exists(cfg.META.LOG_DIR):
        os.makedirs(cfg.META.LOG_DIR)
    init_log("global", logging.INFO)
    if cfg.META.LOG_DIR:
        add_file_handler("global", os.path.join(cfg.META.LOG_DIR, "logs.txt"),
                         logging.INFO)
    logger.info("Version Information: \n{}\n".format(commit()))
    logger.info("config \n{}".format(json.dumps(cfg, indent=4)))
    model = MetaSiamModel().cuda()
    model = load_pretrain(model, cfg.META.PRETRAIN_PATH)
    # init meta train
    model.meta_train_init()
    # parametes want to optim
    optimizer = build_optimizer(model)
    dataloader = build_dataloader()
    meta_train(dataloader, optimizer, model)
Exemplo n.º 9
0
def main():
    cfg.merge_from_file(args.cfg)
    if not os.path.exists(cfg.GRAD.LOG_DIR):
        os.makedirs(cfg.GRAD.LOG_DIR)
    init_log("global", logging.INFO)
    if cfg.GRAD.LOG_DIR:
        add_file_handler("global", os.path.join(cfg.GRAD.LOG_DIR, "logs.txt"),
                         logging.INFO)
    logger.info("Version Information: \n{}\n".format(commit()))
    logger.info("config \n{}".format(json.dumps(cfg, indent=4)))
    model = get_model('GradSiamModel').cuda()
    model = load_pretrain(model, cfg.GRAD.PRETRAIN_PATH)
    # parametes want to optim
    optimizer = build_optimizer(model)
    dataloader = build_dataloader()
    if cfg.GRAD.RESUME:
        logger.info('resume from {}'.format(cfg.GRAD.RESUME_PATH))
        model, optimizer, cfg.GRAD.START_EPOCH = restore_from(
            model, optimizer, cfg.GRAD.RESUME_PATH)
        logger.info('resume done!')
    model.freeze_model()
    train(dataloader, optimizer, model)
Exemplo n.º 10
0
def main():
    seed_torch(123456)
    cfg.merge_from_file(args.cfg)
    init_log('global', logging.INFO)

    base_model = get_model(cfg.MODEL_ARC)
    base_model = load_pretrain(base_model, args.snapshot).cuda().eval()
    # # if want test model pruned
    # base_model = prune_model(base_model).cuda().eval()  # refine the model

    # if want to test real pruning
    # base_model = get_model(cfg.MODEL_ARC)
    # base_model = load_pretrain(base_model, cfg.PRUNING.FINETUNE.PRETRAIN_PATH) # load the mask
    # base_model = prune_model(base_model) # refine the model
    # base_model=load_pretrain(base_model,args.snapshot).cuda().eval() # load the finetune weight

    tracker = get_tracker(args.tracker, base_model)
    data_dir = os.path.join(cfg.TRACK.DATA_DIR, args.dataset)
    dataset = get_dataset(args.dataset, data_dir)
    if args.dataset in ['VOT2016', 'VOT2018']:
        vot_evaluate(dataset, tracker)
    elif args.dataset == 'GOT-10k':
        ope_evaluate(dataset, tracker)
Exemplo n.º 11
0
def main():
    global args, best_acc, tb_writer, logger
    args = parser.parse_args()

    init_log('global', logging.INFO)

    if args.log != "":
        add_file_handler('global', args.log, logging.INFO)

    logger = logging.getLogger('global')
    logger.info("\n" + collect_env_info())
    logger.info(args)

    cfg = load_config(args)
    logger.info("config \n{}".format(json.dumps(cfg, indent=4)))

    if args.log_dir:
        tb_writer = SummaryWriter(args.log_dir)
    else:
        tb_writer = Dummy()

    # build dataset
    train_loader, val_loader = build_data_loader(cfg)
Exemplo n.º 12
0
def main():
    global xent_criterion, triplet_criterion, ment_criterion

    logger.info("init done")

    if os.path.exists(cfg.TRAIN.LOG_DIR):
        shutil.rmtree(cfg.TRAIN.LOG_DIR)
    os.makedirs(cfg.TRAIN.LOG_DIR)
    init_log('global', logging.INFO)
    if cfg.TRAIN.LOG_DIR:
        add_file_handler('global', os.path.join(cfg.TRAIN.LOG_DIR, 'logs.txt'),
                         logging.INFO)

    dataset, train_loader, _, _ = build_data_loader()
    model = BagReID_IBN(dataset.num_train_pids, dataset.num_train_mates)
    xent_criterion = CrossEntropyLabelSmooth(dataset.num_train_pids)
    triplet_criterion = TripletLoss(margin=cfg.TRAIN.TRI_MARGIN)
    ment_criterion = CrossEntropyMate(cfg.TRAIN.MATE_LOSS_WEIGHT)
    if cfg.TRAIN.OPTIM == "sgd":
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=cfg.SOLVER.LEARNING_RATE,
                                    momentum=cfg.SOLVER.MOMENTUM,
                                    weight_decay=cfg.SOLVER.WEIGHT_DECAY)
    else:
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=cfg.SOLVER.LEARNING_RATE,
                                     weight_decay=cfg.SOLVER.WEIGHT_DECAY)

    optimizers = [optimizer]
    schedulers = build_lr_schedulers(optimizers)

    if cfg.CUDA:
        model.cuda()
        if torch.cuda.device_count() > 1:
            model = DataParallel(model)

    if cfg.TRAIN.LOG_DIR:
        summary_writer = SummaryWriter(cfg.TRAIN.LOG_DIR)
    else:
        summary_writer = None

    logger.info("model prepare done")
    start_epoch = cfg.TRAIN.START_EPOCH
    # start training
    for epoch in range(start_epoch, cfg.TRAIN.NUM_EPOCHS):
        train(epoch, train_loader, model, criterion, optimizers,
              summary_writer)
        for scheduler in schedulers:
            scheduler.step()
        # skip if not save model
        if cfg.TRAIN.EVAL_STEP > 0 and (epoch + 1) % cfg.TRAIN.EVAL_STEP == 0 \
                or (epoch + 1) == cfg.TRAIN.NUM_EPOCHS:

            if cfg.CUDA and torch.cuda.device_count() > 1:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()
            save_checkpoint({
                'state_dict': state_dict,
                'epoch': epoch + 1
            },
                            is_best=False,
                            save_dir=cfg.TRAIN.SNAPSHOT_DIR,
                            filename='checkpoint_ep' + str(epoch + 1) +
                            '.pth.tar')
Exemplo n.º 13
0
def main():
    init_log('global', logging.INFO)
    logger = logging.getLogger('global')
    global args, best_recall
    args = parser.parse_args()
    cfg = load_config(args.config)

    if args.dist:
        logger.info('dist:{}'.format(args.dist))
        dist_init(args.port, backend=args.backend)

    # build dataset
    train_loader, val_loader = build_data_loader(args.dataset, cfg)
    # if args.arch == 'resnext_101_64x4d_deform_maskrcnn':
    #     model = resnext_101_64x4d_deform_maskrcnn(cfg = cfg['shared'])
    # elif args.arch == 'FishMask':
    #     model = FishMask(cfg = cfg['shared'])
    # else:
    #     if args.arch.find('fpn'):
    #         arch = args.arch.replace('fpn', '')
    #         model = resnet_fpn.__dict__[arch](pretrained=False, cfg = cfg['shared'])
    #     else:
    model = resnet.__dict__[args.arch](pretrained=False, cfg=cfg['shared'])
    logger.info('build model done')
    logger.info(model)

    if args.pretrained:
        model = load_pretrain(model, args.pretrained)

    trainable_params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(trainable_params,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        assert os.path.isfile(args.resume), '{} is not a valid file'.format(
            args.resume)
        model, optimizer, args.start_epoch, best_recall, arch = restore_from(
            model, optimizer, args.resume)

    model = model.cuda()
    if args.dist:
        broadcast_params(model)

    logger.info('build dataloader done')
    if args.evaluate:
        rc = validate(val_loader, model, cfg)
        logger.info('recall=%f' % rc)
        return

    # warmup to enlarge lr
    if args.start_epoch == 0 and args.warmup_epochs > 0:
        world_size = 1
        try:
            world_size = dist.get_world_size()
        except Exception as e:
            print(e)
        rate = world_size * args.batch_size
        warmup_iter = args.warmup_epochs * len(train_loader)
        assert (warmup_iter > 1)
        gamma = rate**(1.0 / (warmup_iter - 1))
        lr_scheduler = IterExponentialLR(optimizer, gamma)
        for epoch in range(args.warmup_epochs):
            logger.info('warmup epoch %d' % (epoch))
            train(train_loader,
                  model,
                  lr_scheduler,
                  epoch + 1,
                  cfg,
                  warmup=True)
        # overwrite initial_lr with magnified lr through warmup
        for group in optimizer.param_groups:
            group['initial_lr'] = group['lr']
        logger.info('warmup for %d epochs done, start large batch training' %
                    args.warmup_epochs)

    lr_scheduler = MultiStepLR(optimizer,
                               milestones=args.step_epochs,
                               gamma=0.1,
                               last_epoch=args.start_epoch - 1)
    for epoch in range(args.start_epoch, args.epochs):
        logger.info('step_epochs:{}'.format(args.step_epochs))
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]
        # train for one epoch
        train(train_loader, model, lr_scheduler, epoch + 1, cfg)

        if (epoch + 1) % 5 == 0 or epoch + 1 == args.epochs:
            # evaluate on validation set
            recall = validate(val_loader, model, cfg)
            # remember best prec@1 and save checkpoint
            is_best = recall > best_recall
            best_recall = max(recall, best_recall)
            logger.info('recall %f(%f)' % (recall, best_recall))

        if (not args.dist) or (dist.get_rank() == 0):
            if not os.path.exists(args.save_dir):
                os.makedirs(args.save_dir)
            save_path = os.path.join(args.save_dir,
                                     'checkpoint_e%d.pth' % (epoch + 1))
            torch.save(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.cpu().state_dict(),
                    'best_recall': best_recall,
                    'optimizer': optimizer.state_dict(),
                }, save_path)
Exemplo n.º 14
0
def main():
    global args, device, max_acc, writer

    max_acc = -1
    args = parser.parse_args()
    if args.arch == 'SharpMask':
        trainSm = True
        args.hfreq = 1
        args.gSz = args.iSz
    else:
        trainSm = False

    # Setup experiments results path
    pathsv = 'sharpmask/train' if trainSm else 'deepmask/train'
    args.rundir = join(args.rundir, pathsv)
    try:
        if not isdir(args.rundir):
            makedirs(args.rundir)
    except OSError as err:
        print(err)

    # Setup logger
    init_log('global', logging.INFO)
    add_file_handler('global', join(args.rundir, 'train.log'), logging.INFO)
    logger = logging.getLogger('global')
    logger.info('running in directory %s' % args.rundir)
    logger.info(args)
    writer = SummaryWriter(log_dir=join(args.rundir, 'tb'))

    # Get argument defaults (hastag #thisisahack)
    parser.add_argument('--IGNORE', action='store_true')
    defaults = vars(parser.parse_args(['--IGNORE']))

    # Print all arguments, color the non-defaults
    for argument, value in sorted(vars(args).items()):
        reset = colorama.Style.RESET_ALL
        color = reset if value == defaults[argument] else colorama.Fore.MAGENTA
        logger.info('{}{}: {}{}'.format(color, argument, value, reset))

    # Setup seeds
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    np.random.seed(args.seed)
    random.seed(args.seed)

    # Setup device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Setup Model
    model = (models.__dict__[args.arch](args)).to(device)
    model = torch.nn.DataParallel(model,
                                  device_ids=range(torch.cuda.device_count()))
    logger.info(model)

    # Setup data loader
    train_dataset = get_loader(args.dataset)(args, split='train')
    val_dataset = get_loader(args.dataset)(args, split='val')
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch,
                                   num_workers=args.workers,
                                   pin_memory=True,
                                   sampler=None)
    val_loader = data.DataLoader(val_dataset,
                                 batch_size=args.batch,
                                 num_workers=args.workers,
                                 pin_memory=True,
                                 sampler=None)

    # Setup Metrics
    criterion = nn.SoftMarginLoss().to(device)

    # Setup optimizer, lr_scheduler and loss function
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    scheduler = MultiStepLR(optimizer, milestones=[50, 120], gamma=0.3)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            logger.info("loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            max_acc = checkpoint['max_acc']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            logger.info("loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            logger.warning("no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    for epoch in range(args.start_epoch, args.maxepoch):
        scheduler.step(epoch=epoch)
        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if epoch % 2 == 1:
            acc = validate(val_loader, model, criterion, epoch)

            is_best = acc > max_acc
            max_acc = max(acc, max_acc)
            # remember best mean loss and save checkpoint
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'max_acc': max_acc,
                    'optimizer': optimizer.state_dict(),
                }, is_best, args.rundir)
Exemplo n.º 15
0
def main():
    global args, best_acc, tb_writer, logger
    args = parser.parse_args()
    args = args_process(args)

    init_log('global', logging.INFO)

    if args.log != "":
        add_file_handler('global', args.log, logging.INFO)

    logger = logging.getLogger('global')
    logger.info("\n" + collect_env_info())
    logger.info(args)

    cfg = load_config(args)
    logger.info("config \n{}".format(json.dumps(cfg, indent=4)))

    if args.log_dir:
        tb_writer = SummaryWriter(args.log_dir)
    else:
        tb_writer = Dummy()

    # build dataset
    train_loader, val_loader = build_data_loader(cfg)

    args.img_size = int(cfg['train_datasets']['search_size'])
    args.nms_threshold = float(cfg['train_datasets']['RPN_NMS'])
    if args.arch == 'Custom':
        from custom import Custom
        model = Custom(pretrain=True,
                       opts=args,
                       anchors=train_loader.dataset.anchors)
    else:
        exit()
    logger.info(model)

    if args.pretrained:
        model = load_pretrain(model, args.pretrained)

    model = model.cuda()
    dist_model = torch.nn.DataParallel(model,
                                       list(range(
                                           torch.cuda.device_count()))).cuda()

    if args.resume and args.start_epoch != 0:
        model.features.unfix((args.start_epoch - 1) / args.epochs)

    optimizer, lr_scheduler = build_opt_lr(model, cfg, args, args.start_epoch)
    # optionally resume from a checkpoint
    if args.resume:
        assert os.path.isfile(args.resume), '{} is not a valid file'.format(
            args.resume)
        model, optimizer, args.start_epoch, best_acc, arch = restore_from(
            model, optimizer, args.resume)
        dist_model = torch.nn.DataParallel(
            model, list(range(torch.cuda.device_count()))).cuda()

    logger.info(lr_scheduler)

    logger.info('model prepare done')
    global cur_lr

    if not os.path.exists(args.save_dir):  # makedir/save model
        os.makedirs(args.save_dir)
    num_per_epoch = len(train_loader.dataset) // args.batch
    num_per_epoch_val = len(val_loader.dataset) // args.batch

    for epoch in range(args.start_epoch, args.epochs):
        lr_scheduler.step(epoch)
        cur_lr = lr_scheduler.get_cur_lr()
        logger = logging.getLogger('global')
        train_avg = AverageMeter()
        val_avg = AverageMeter()

        if dist_model.module.features.unfix(epoch / args.epochs):
            logger.info('unfix part model.')
            optimizer, lr_scheduler = build_opt_lr(dist_model.module, cfg,
                                                   args, epoch)

        train(train_loader, dist_model, optimizer, lr_scheduler, epoch, cfg,
              train_avg, num_per_epoch)

        if dist_model.module.features.unfix(epoch / args.epochs):
            logger.info('unfix part model.')
            optimizer, lr_scheduler = build_opt_lr(dist_model.module, cfg,
                                                   args, epoch)

        if (epoch + 1) % args.save_freq == 0:
            save_checkpoint(
                {
                    'epoch': epoch,
                    'arch': args.arch,
                    'state_dict': dist_model.module.state_dict(),
                    'best_acc': best_acc,
                    'optimizer': optimizer.state_dict(),
                    'anchor_cfg': cfg['anchors']
                }, False,
                os.path.join(args.save_dir, 'checkpoint_e%d.pth' % (epoch)),
                os.path.join(args.save_dir, 'best.pth'))

            validation(val_loader, dist_model, epoch, cfg, val_avg,
                       num_per_epoch_val)
Exemplo n.º 16
0
def main():
    # 获取命令行参数信息
    global args, logger, v_id
    args = parser.parse_args()
    # 获取配置文件中配置信息:主要包括网络结构,超参数等
    cfg = load_config(args)
    # 初始化logxi信息,并将日志信息输入到磁盘文件中
    init_log('global', logging.INFO)
    if args.log != "":
        add_file_handler('global', args.log, logging.INFO)
    # 将相关的配置信息输入到日志文件中
    logger = logging.getLogger('global')
    logger.info(args)

    # setup model
    # 加载网络模型架构
    if args.arch == 'Custom':
        from custom import Custom
        model = Custom(anchors=cfg['anchors'])
    else:
        parser.error('invalid architecture: {}'.format(args.arch))
    # 加载网络模型参数
    if args.resume:
        assert isfile(args.resume), '{} is not a valid file'.format(
            args.resume)
        model = load_pretrain(model, args.resume)
    # 使用评估模式,将drop等激活
    model.eval()
    # 硬件信息
    device = torch.device('cuda' if (
        torch.cuda.is_available() and not args.cpu) else 'cpu')
    model = model.to(device)
    # 加载数据集 setup dataset
    dataset = load_dataset(args.dataset)

    # 这三种数据支持掩膜 VOS or VOT?
    if args.dataset in ['DAVIS2016', 'DAVIS2017', 'ytb_vos'] and args.mask:
        vos_enable = True  # enable Mask output
    else:
        vos_enable = False

    total_lost = 0  # VOT
    iou_lists = []  # VOS
    speed_list = []
    # 对数据进行处理
    for v_id, video in enumerate(dataset.keys(), start=1):
        if args.video != '' and video != args.video:
            continue
        # true 调用track_vos
        if vos_enable:
            # 如测试数据是['DAVIS2017', 'ytb_vos']时,会开启多目标跟踪
            iou_list, speed = track_vos(
                model,
                dataset[video],
                cfg['hp'] if 'hp' in cfg.keys() else None,
                args.mask,
                args.refine,
                args.dataset in ['DAVIS2017', 'ytb_vos'],
                device=device)
            iou_lists.append(iou_list)
        # False 调用track_vot
        else:
            lost, speed = track_vot(model,
                                    dataset[video],
                                    cfg['hp'] if 'hp' in cfg.keys() else None,
                                    args.mask,
                                    args.refine,
                                    device=device)
            total_lost += lost
        speed_list.append(speed)

    # report final result
    if vos_enable:
        for thr, iou in zip(thrs, np.mean(np.concatenate(iou_lists), axis=0)):
            logger.info('Segmentation Threshold {:.2f} mIoU: {:.3f}'.format(
                thr, iou))
    else:
        logger.info('Total Lost: {:d}'.format(total_lost))

    logger.info('Mean Speed: {:.2f} FPS'.format(np.mean(speed_list)))
Exemplo n.º 17
0
def train():
    init_log('global', logging.INFO)
    logger = logging.getLogger("global")
    if args.img_dim == 300:
        cfg = (FEDet_VOC_300, FEDet_COCO_300)[args.dataset == 'COCO']
    else:
        cfg = (FEDet_VOC_512, FEDet_COCO_512)[args.dataset == 'COCO']

    if args.use_dataAug:
        train_transform = SSDAugmentation(cfg['min_dim'], MEANS)
    else:
        train_transform = Augmentation(cfg['min_dim'], MEANS)

    if args.dataset == 'COCO':
        if args.dataset_root == VOC_ROOT:
            if not os.path.exists(COCO_ROOT):
                parser.error('Must specify dataset_root if specifying dataset')
            logger.warning(
                "WARNING: Using default COCO dataset_root because " +
                "--dataset_root was not specified.")
            args.dataset_root = COCO_ROOT
        dataset = COCODetection(root=args.dataset_root,
                                image_sets=[("2017", "train")],
                                transform=train_transform,
                                target_transform=COCOAnnotationTransform(),
                                aux=args.use_aux)
    elif args.dataset == 'VOC':
        if args.dataset_root == COCO_ROOT:
            parser.error('Must specify dataset if specifying dataset_root')

        args.dataset_root = VOC_ROOT
        dataset = VOCDetection(root=args.dataset_root,
                               image_sets=[('2007', 'trainval'),
                                           ('2012', 'trainval')],
                               transform=train_transform,
                               aux=args.use_aux)

    if not os.path.exists(args.save_folder):
        os.makedirs(args.save_folder)

    if args.visdom:
        import visdom
        viz = visdom.Visdom()
    if args.arch == 'FEDet':
        build_net = build_fedet(cfg, 'train', cfg['min_dim'],
                                cfg['num_classes'])
    else:
        logger.error('architenture error!!!')
        return
    net = build_net
    logger.info(net)
    logger.info('---------config-----------')
    logger.info(cfg)
    if args.cuda:
        net = torch.nn.DataParallel(build_net)
        cudnn.benchmark = True

    if args.resume:
        logger.info('Resuming training, loading {}...'.format(args.resume))
        build_net.load_weights(args.resume)
    else:
        vgg_weights = torch.load(args.pretrained_model + args.basenet)
        logger.info('Loading base network...')
        build_net.vgg.load_state_dict(vgg_weights)

    if not args.resume:
        logger.info('Initializing weights...')

        def weights_init(m):
            for key in m.state_dict():
                if key.split('.')[-1] == 'weight':
                    if 'conv' in key:
                        init.kaiming_normal_(m.state_dict()[key],
                                             mode='fan_out')
                    if 'bn' in key:
                        m.state_dict()[key][...] = 1
                elif key.split('.')[-1] == 'bias':
                    m.state_dict()[key][...] = 0

        # initialize newly added layers' weights with xavier method
        build_net.extras.apply(weights_init)
        build_net.loc.apply(weights_init)
        build_net.conf.apply(weights_init)

    if args.cuda:
        net.cuda()
        cudnn.benchmark = True

    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    criterion1 = MultiBoxLoss(cfg, 0.5, True, 0, True, 3, 0.5, False,
                              args.cuda)
    criterion2 = nn.BCELoss(size_average=True).cuda()

    net.train()
    # loss counters
    loc_loss = 0
    conf_loss = 0
    ssm_loss = 0  ## SSM loss counter
    epoch = 0
    logger.info('Loading the dataset...')

    epoch_size = len(dataset) // args.batch_size
    logger.info('Training FEDet on: %s' % dataset.name)
    logger.info('Trainging images size: %d' % len(dataset))
    logger.info('Using the specified args:')
    logger.info(args)

    step_index = 0

    if args.visdom:
        vis_title = 'SSD.PyTorch on ' + dataset.name
        vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss']
        iter_plot = create_vis_plot(viz, 'Iteration', 'Loss', vis_title,
                                    vis_legend)
        epoch_plot = create_vis_plot(viz, 'Epoch', 'Loss', vis_title,
                                     vis_legend)

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True,
                                  collate_fn=detection_collate_fedet
                                  if args.use_aux else detection_collate,
                                  pin_memory=True)
    start_training_time = time.time()
    # create batch iterator
    batch_iterator = iter(data_loader)
    for iteration in range(args.start_iter, cfg['max_iter']):
        if iteration != 0 and (iteration % epoch_size == 0):
            epoch += 1
        if args.visdom and iteration != 0 and (iteration % epoch_size == 0):
            update_vis_plot(viz, epoch, loc_loss, conf_loss, epoch_plot, None,
                            'append', epoch_size)
            # reset epoch loss counters
            loc_loss = 0
            conf_loss = 0
            ssm_loss = 0

        if iteration in cfg['lr_steps']:
            step_index += 1
            adjust_learning_rate(optimizer, args.gamma, step_index)

        # load train data
        try:
            if args.use_aux:
                images, targets, aux_targets = next(batch_iterator)
            else:
                images, targets = next(batch_iterator)
        except StopIteration:
            batch_iterator = iter(data_loader)
            if args.use_aux:
                images, targets, aux_targets = next(batch_iterator)
            else:
                images, targets = next(batch_iterator)
        if images.size(0) < args.batch_size:
            continue
        if args.cuda:
            images = Variable(images.cuda())
            targets = [Variable(ann.cuda()) for ann in targets]
            if args.use_aux:
                aux_targets = Variable(aux_targets.cuda())
        else:
            images = Variable(images)
            targets = [Variable(ann) for ann in targets]
            if args.use_aux:
                aux_targets = Variable(aux_targets)
        # forward
        t0 = time.time()
        assert images.size(2) == args.img_dim and images.size(
            3) == args.img_dim
        out = net(images)
        # backprop
        optimizer.zero_grad()
        if args.use_aux:
            loss_loc, loss_cls = criterion1(out[2:], targets)
            loss_ssm1 = criterion2(out[0], aux_targets)
            loss_ssm2 = criterion2(out[1], aux_targets)
            loss = loss_loc + loss_cls + loss_ssm1.double() + loss_ssm2.double(
            )
            loss.backward()
            optimizer.step()
            t1 = time.time()
            loc_loss = loss_loc.item()
            conf_loss = loss_cls.item()
            ssm_loss = loss_ssm1.item() + loss_ssm2.item()
        else:
            loss_loc, loss_cls = criterion1(out, targets)
            loss = loss_loc + loss_cls
            loss.backward()
            optimizer.step()
            t1 = time.time()
            loc_loss = loss_loc.item()
            conf_loss = loss_cls.item()
            ssm_loss = 0
        if iteration % 10 == 0:
            logger.info(
                'iter ' + repr(iteration) + '/' + str(cfg['max_iter']) +
                ' || epoch: ' + str(epoch + 1) + ' || LR: ' +
                repr(optimizer.param_groups[0]['lr']) +
                ' || total loss: %.4f || loc Loss: %.4f || conf Loss: %.4f || SSM loss: %.4f || '
                % (loss.item(), loc_loss, conf_loss, ssm_loss) +
                'timer: %.4f sec.' % (t1 - t0))

        if args.visdom:
            update_vis_plot(viz, iteration, loss_loc.item(), loss_cls.item(),
                            iter_plot, epoch_plot, 'append')

        if iteration != 0 and iteration % 10000 == 0:
            logger.info('Saving state, iter: %d' % iteration)
            ckpt_path = os.path.join(
                args.save_folder, args.arch + str(args.img_dim) + '_' +
                str(args.dataset) + '_' + str(iteration) + '.pth')
            torch.save(build_net.state_dict(), ckpt_path)
    torch.save(build_net.state_dict(),
               os.path.join(args.save_folder, 'models.pth'))
    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logging.info("Total training time : {} ".format(total_time_str))
Exemplo n.º 18
0
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.data import WeightedRandomSampler

from models.build_model import ModelBuilder
from datasets.fld_dataset import FLDDS, TransformBuilder
from losses.wing_loss import WingLoss, SmoothWingLoss, WiderWingLoss, NormalizedWiderWingLoss, L2Loss, EuclideanLoss, NMELoss, LaplacianLoss
from utils.log_helper import init_log
from utils.vis_utils import save_result_imgs, save_result_nmes, save_result_lmks
from utils.vis_utils import get_logger, add_scalar, get_model_graph, CsvHelper
from utils.misc import save_checkpoint, print_speed, load_model, get_checkpoints
from utils.metrics import MiscMeter, eval_NME
from utils.imutils import refine_300W_landmarks

init_log('FLD')
logger = logging.getLogger('FLD')


class FLD(object):
    def __init__(self, task_config):
        self.config = EasyDict(task_config)
        cudnn.benchmark = True
        self._build()

    def train(self):
        config = self.config.train_param
        num_kpts = config.num_kpts
        lr_scheduler = self.lr_scheduler
        train_loader = self.train_loader
        model = self.model
Exemplo n.º 19
0
def main():
    global args, best_acc, tb_writer, logger
    args = parser.parse_args()

    init_log('global', logging.INFO)

    if args.log != "":
        add_file_handler('global', args.log, logging.INFO)

    
    print("Init logger")

    logger = logging.getLogger('global')

    print(44)
    #logger.info("\n" + collect_env_info())
    print(99)
    logger.info(args)

    cfg = load_config(args)
    logger.info("config \n{}".format(json.dumps(cfg, indent=4)))

    print(2)

    if args.log_dir:
        tb_writer = SummaryWriter(args.log_dir)
    else:
        tb_writer = Dummy()

    # build dataset
    train_loader, val_loader = build_data_loader(cfg)

    print(3)

    path = "/usr4/alg504/cliao25/siammask/experiments/siammask_base/snapshot/checkpoint_e{}.pth"

    for epoch in range(1,21):

        if args.arch == 'Custom':
            from custom import Custom
            model = Custom(pretrain=True, anchors=cfg['anchors'])
        else:
            exit()

        print(4)

        if args.pretrained:
            model = load_pretrain(model, args.pretrained)

        model = model.cuda()


        #model.features.unfix((epoch - 1) / 20)
        optimizer, lr_scheduler = build_opt_lr(model, cfg, args, epoch)
        filepath = path.format(epoch)
        assert os.path.isfile(filepath)

        model, _, _, _, _ = restore_from(model, optimizer, filepath)
        #model = load_pretrain(model, filepath)
        model = torch.nn.DataParallel(model, list(range(torch.cuda.device_count()))).cuda()

        model.train()
        device = torch.device('cuda')
        model = model.to(device)

        valid(val_loader, model, cfg)

    print("Done")
Exemplo n.º 20
0
# board的路径
board_path = cfg.meta["board_path"]
experiment_path = cfg.meta["experiment_path"]
experiment_name = cfg.meta["experiment_name"]
arch = cfg.meta["arch"]
# 训练时候的一些参数
batch_size = cfg.train['batch_size']
epoches = cfg.train['epoches']
lr = cfg.train['lr']
# 初始化未来帧的数量
num_frame = cfg.model['input_num']
# print freq
print_freq = cfg.train['print_freq']

# 初始化logger
global_logger = init_log('global', level=logging.INFO)
add_file_handler("global",
                 os.path.join(os.getcwd(), 'logs',
                              '{}.log'.format(experiment_name)),
                 level=logging.DEBUG)

# 打印cfg信息
cfg.log_dict()

# 初始化avrager
avg = AverageMeter()

# cuda
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True
Exemplo n.º 21
0
from omegaconf import OmegaConf
from utils.loadConfig import load_cfg
from utils.average_meter_helper import AverageMeter
from utils.log_helper import init_log, add_file_handler, print_speed

# get method
from experiment.triplet_utils.get_loss import get_loss
from experiment.triplet_utils.get_backbone import get_backbone
from experiment.triplet_utils.get_optimizer import get_optimizer
from experiment.triplet_utils.get_dataloader import get_train_dataloader

# load model (more eazy way to get model.)
from experiment.triplet_utils.load_model import load_model_test

# init logger
logger = init_log("global")


def validation(epoch, log_interval, test_dataloader, model, loss, writer,
               device):
    """Validate on test dataset.

    Current validation is only for loss, pos|neg_distance.
    In future, we will add more validation like MAP5|10|50|100. 
    (maybe in another file.)

    Args:
        log_interval:
            How many time will the logger log once.
        test_dataloader:
            It should not be none! A Triplet dataloader to validate data.
Exemplo n.º 22
0
def main():
    init_log('global', logging.INFO)
    if args.log != "":
        add_file_handler('global', args.log, logging.INFO)

    params = {'penalty_k': args.penalty_k,
              'window_influence': args.window_influence,
              'lr': args.lr,
              'instance_size': args.search_region}

    num_search = len(params['penalty_k']) * len(params['window_influence']) * \
        len(params['lr']) * len(params['instance_size'])

    print(params)
    print(num_search)

    cfg = load_config(args)
    if args.arch == 'Custom':
        from custom import Custom
        model = Custom(anchors=cfg['anchors'])
    else:
        model = models.__dict__[args.arch](anchors=cfg['anchors'])

    if args.resume:
        assert isfile(args.resume), '{} is not a valid file'.format(args.resume)
        model = load_pretrain(model, args.resume)
    model.eval()
    model = model.to(device)

    default_hp = cfg.get('hp', {})

    p = dict()

    p['network'] = model
    p['network_name'] = args.arch+'_'+args.resume.split('/')[-1].split('.')[0]
    p['dataset'] = args.dataset

    global ims, gt, image_files

    dataset_info = load_dataset(args.dataset)
    videos = list(dataset_info.keys())
    np.random.shuffle(videos)

    for video in videos:
        print(video)
        if isfile('finish.flag'):
            return

        p['video'] = video
        ims = None
        image_files = dataset_info[video]['image_files']
        gt = dataset_info[video]['gt']

        np.random.shuffle(params['penalty_k'])
        np.random.shuffle(params['window_influence'])
        np.random.shuffle(params['lr'])
        for penalty_k in params['penalty_k']:
            for window_influence in params['window_influence']:
                for lr in params['lr']:
                    for instance_size in params['instance_size']:
                        p['hp'] = default_hp.copy()
                        p['hp'].update({'penalty_k':penalty_k,
                                'window_influence':window_influence,
                                'lr':lr,
                                'instance_size': instance_size,
                                })
                        tune(p)
Exemplo n.º 23
0
from PIL import Image
from PIL import ImageFile
from torchvision import transforms
from tqdm import tqdm
from torch.utils.data import DataLoader
from network import net
from network import styler2
from sampler import InfiniteSamplerWrapper
from torchvision.utils import save_image

import time
import logging
from utils.log_helper import init_log
from torch.autograd import Variable
import mmcv
init_log('global', logging.INFO)
logger = logging.getLogger('global')


cudnn.benchmark = True
Image.MAX_IMAGE_PIXELS = None  # Disable DecompressionBombError
ImageFile.LOAD_TRUNCATED_IMAGES = True  # Disable OSError: image file is truncated


def adjust_learning_rate(optimizer, iteration_count):
    """Imitating the original implementation"""
    lr = args.lr / (1.0 + args.lr_decay * iteration_count)
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

Exemplo n.º 24
0
def main():
    global args, best_acc, tb_writer, logger
    args = parser.parse_args()
    args = args_process(args)

    init_log('global', logging.INFO)

    if args.log != "":
        add_file_handler('global', args.log, logging.INFO)

    logger = logging.getLogger('global')
    logger.info("\n" + collect_env_info())
    logger.info(args)

    cfg = load_config(args)
    logger.info("config \n{}".format(json.dumps(cfg, indent=4)))

    if args.log_dir:
        tb_writer = SummaryWriter(args.log_dir)
    else:
        tb_writer = Dummy()

    # build dataset
    train_loader, val_loader = build_data_loader(cfg)

    args.img_size = int(cfg['train_datasets']['search_size'])
    args.nms_threshold = float(cfg['train_datasets']['RPN_NMS'])
    if args.arch == 'Custom':
        from custom import Custom
        model = Custom(pretrain=True,
                       opts=args,
                       anchors=train_loader.dataset.anchors)
    else:
        exit()
    logger.info(model)

    if args.pretrained:
        model = load_pretrain(model, args.pretrained)

    model = model.cuda()
    dist_model = torch.nn.DataParallel(model,
                                       list(range(
                                           torch.cuda.device_count()))).cuda()

    if args.resume and args.start_epoch != 0:
        model.features.unfix((args.start_epoch - 1) / args.epochs)

    optimizer, lr_scheduler = build_opt_lr(model, cfg, args, args.start_epoch)
    # optionally resume from a checkpoint
    if args.resume:
        assert os.path.isfile(args.resume), '{} is not a valid file'.format(
            args.resume)
        model, optimizer, args.start_epoch, best_acc, arch = restore_from(
            model, optimizer, args.resume)
        dist_model = torch.nn.DataParallel(
            model, list(range(torch.cuda.device_count()))).cuda()

    logger.info(lr_scheduler)

    logger.info('model prepare done')

    train(train_loader, dist_model, optimizer, lr_scheduler, args.start_epoch,
          cfg)
Exemplo n.º 25
0
def main():
    global args, best_acc, tb_writer, logger
    args = parser.parse_args()

    init_log('global', logging.INFO)

    if args.log != "":
        add_file_handler('global', args.log, logging.INFO)

    logger = logging.getLogger('global')
    logger.info(args)

    cfg = load_config(args)

    logger.info("config \n{}".format(json.dumps(cfg, indent=4)))
    
    logger.info("\n" + collect_env_info())

    if args.log_dir:
        tb_writer = SummaryWriter(args.log_dir)
    else:
        tb_writer = Dummy()

    # build dataset
    train_loader, val_loader = build_data_loader(cfg)

    if args.arch == 'Custom':
        from custom import Custom
        model = Custom(pretrain=True, anchors=cfg['anchors'])
    else:
        model = models.__dict__[args.arch](anchors=cfg['anchors'])

    logger.info(model)

    if args.pretrained:
        model = load_pretrain(model, args.pretrained)

    model = model.cuda()
    dist_model = torch.nn.DataParallel(model, list(range(torch.cuda.device_count()))).cuda()

    if args.resume and args.start_epoch != 0:
        model.features.unfix((args.start_epoch - 1) / args.epochs)

    optimizer, lr_scheduler = build_opt_lr(model, cfg, args, args.start_epoch)
    logger.info(lr_scheduler)
    # optionally resume from a checkpoint
    if args.resume:
        assert os.path.isfile(args.resume), '{} is not a valid file'.format(args.resume)
        model, optimizer, args.start_epoch, best_acc, arch = restore_from(model, optimizer, args.resume)
        dist_model = torch.nn.DataParallel(model, list(range(torch.cuda.device_count()))).cuda()
        epoch = args.start_epoch
        if dist_model.module.features.unfix(epoch/args.epochs):
            logger.info('unfix part model.')
            optimizer, lr_scheduler = build_opt_lr(dist_model.module, cfg, args, epoch)
        lr_scheduler.step(epoch)
        cur_lr = lr_scheduler.get_cur_lr()
        logger.info('epoch:{} resume lr {}'.format(epoch, cur_lr))

    logger.info('model prepare done')

    train(train_loader, dist_model, optimizer, lr_scheduler, args.start_epoch, cfg)
Exemplo n.º 26
0
def main():
    # init logger
    init_log('global', args.save_dir, logging.INFO)
    logger = logging.getLogger('global')
    # print arguments
    for arg in vars(args):
        logger.info("{}: {}".format(arg, getattr(args, arg)))

    # get device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # build dataloader and model
    train_loader, test_loader = build_nyu_dataloader(args.dataset_dir)
    opts = {"L": 5, "k": 12, "bn": True}
    model = D3(opts)

    # check GPU numbers and deploy parallel
    # parallel = False
    # if torch.cuda.device_count() > 1:
    #     parallel = True
    #     logger.info("Let's use {:d} GPUs!".format(torch.cuda.device_count()))
    #     # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
    #     model = nn.DataParallel(model)
    model.to(device)

    logger.info("*" * 40)
    logger.info(model)
    logger.info("*" * 40)

    # optimizer settings
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    # optionally resume from a checkpoint
    # if args.resume:
    #     if os.path.isfile(args.resume):
    #         model, _, args.start_epoch = restore_from(model, optimizer, args.resume)

    # set the best model
    best_model_wts = copy.deepcopy(model.state_dict())
    best_abs_rel = 0.0
    logger.info("Start training...")

    # epoches = args.batches // train_loader.__len__()

    for epoch in range(args.epoches):

        for g in optimizer.param_groups:
            g['lr'] = args.lr * (1 - args.lr_decay)**(epoch //
                                                      args.lr_decay_step)
        writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch)

        t0 = time.time()
        train_one_epoch(train_loader, model, optimizer, device, epoch)
        t1 = time.time()

        if epoch % args.test_rate == 0:
            test_abs_rel = test_one_epoch(test_loader, model, device, epoch)
            if test_abs_rel < best_abs_rel:
                best_model_wts = copy.deepcopy(model.state_dict())

        torch.cuda.empty_cache()

        if epoch % args.test_rate == 0:
            filename = os.path.join(args.save_dir,
                                    'checkpoint_e%d.pth' % (epoch + 1))
            save_checkpoint(
                {
                    'epoch': epoch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict()
                },
                is_best=False,
                filename=filename)
            logger.info("Saved model : {}".format(filename))

        print_speed(epoch, t1 - t0, args.epoches)

        save_checkpoint(
            {
                'batch_num': epoch,
                'state_dict': best_model_wts,
                'optimizer': optimizer.state_dict()
            },
            is_best=True,
            filename=os.path.join(args.save_dir, 'model_best.pth'))

    writer.close()
Exemplo n.º 27
0
def main():
    logger = logging.getLogger('global')
    global criterion_xent, criterion_triplet, criterion_center
    if os.path.exists(cfg.TRAIN.LOG_DIR):
        shutil.rmtree(cfg.TRAIN.LOG_DIR)
    os.makedirs(cfg.TRAIN.LOG_DIR)
    init_log('global', logging.INFO)  # log
    add_file_handler('global', os.path.join(cfg.TRAIN.LOG_DIR, 'logs.txt'),
                     logging.INFO)
    summary_writer = SummaryWriter(cfg.TRAIN.LOG_DIR)  # visualise

    dataset, train_loader, _, _ = build_data_loader()
    model = BagReID_RESNET(dataset.num_train_bags)
    criterion_xent = CrossEntropyLabelSmooth(dataset.num_train_bags,
                                             use_gpu=cfg.CUDA)
    criterion_triplet = TripletLoss(margin=cfg.TRAIN.MARGIN)
    criterion_center = CenterLoss(dataset.num_train_bags,
                                  cfg.MODEL.GLOBAL_FEATS +
                                  cfg.MODEL.PART_FEATS,
                                  use_gpu=cfg.CUDA)
    if cfg.TRAIN.OPTIM == "sgd":
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=cfg.SOLVER.LEARNING_RATE,
                                    momentum=cfg.SOLVER.MOMENTUM,
                                    weight_decay=cfg.SOLVER.WEIGHT_DECAY)
    else:
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=cfg.SOLVER.LEARNING_RATE,
                                     weight_decay=cfg.SOLVER.WEIGHT_DECAY)

    center_optimizer = torch.optim.SGD(criterion_center.parameters(),
                                       lr=cfg.SOLVER.LEARNING_RATE_CENTER)

    optimizers = [optimizer, center_optimizer]
    schedulers = build_lr_schedulers(optimizers)

    if cfg.CUDA:
        model.cuda()
        if torch.cuda.device_count() > 1:
            model = torch.nn.DataParallel(model, device_ids=cfg.DEVICES)

    logger.info("model prepare done")
    # start training
    for epoch in range(cfg.TRAIN.NUM_EPOCHS):
        train(epoch, train_loader, model, criterion, optimizers,
              summary_writer)
        for scheduler in schedulers:
            scheduler.step()

        # skip if not save model
        if cfg.TRAIN.EVAL_STEP > 0 and (epoch + 1) % cfg.TRAIN.EVAL_STEP == 0 \
                or (epoch + 1) == cfg.TRAIN.NUM_EPOCHS:

            if cfg.CUDA and torch.cuda.device_count() > 1:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()
            save_checkpoint({
                'state_dict': state_dict,
                'epoch': epoch + 1
            },
                            is_best=False,
                            save_dir=cfg.TRAIN.SNAPSHOT_DIR,
                            filename='checkpoint_ep' + str(epoch + 1) + '.pth')
Exemplo n.º 28
0
def main():
    """
    基础网络的训练
    :return:
    """
    global args, best_acc, tb_writer, logger
    args = parser.parse_args()
    # 初始化日志信息
    init_log('global', logging.INFO)

    if args.log != "":
        add_file_handler('global', args.log, logging.INFO)
    # 获取log信息
    logger = logging.getLogger('global')
    logger.info("\n" + collect_env_info())
    logger.info(args)
    # 获取配置信息
    cfg = load_config(args)
    logger.info("config \n{}".format(json.dumps(cfg, indent=4)))

    if args.log_dir:
        tb_writer = SummaryWriter(args.log_dir)
    else:
        tb_writer = Dummy()

    # 构建数据集
    train_loader, val_loader = build_data_loader(cfg)
    # 加载训练网络
    if args.arch == 'Custom':
        from custom import Custom
        model = Custom(pretrain=True, anchors=cfg['anchors'])
    else:
        exit()
    logger.info(model)
    # 加载预训练网络
    if args.pretrained:
        model = load_pretrain(model, args.pretrained)

    # GPU版本
    # model = model.cuda()
    # dist_model = torch.nn.DataParallel(model, list(range(torch.cuda.device_count()))).cuda()
    # 网络模型
    dist_model = torch.nn.DataParallel(model)
    # 模型参数的更新比例
    if args.resume and args.start_epoch != 0:
        model.features.unfix((args.start_epoch - 1) / args.epochs)
    # 获取优化器和学习率的更新策略
    optimizer, lr_scheduler = build_opt_lr(model, cfg, args, args.start_epoch)
    # optionally resume from a checkpoint 加载模型
    if args.resume:
        assert os.path.isfile(args.resume), '{} is not a valid file'.format(
            args.resume)
        model, optimizer, args.start_epoch, best_acc, arch = restore_from(
            model, optimizer, args.resume)
        # GPU
        # dist_model = torch.nn.DataParallel(model, list(range(torch.cuda.device_count()))).cuda()
        dist_model = torch.nn.DataParallel(model)

    logger.info(lr_scheduler)

    logger.info('model prepare done')
    # 模型训练
    train(train_loader, dist_model, optimizer, lr_scheduler, args.start_epoch,
          cfg)
Exemplo n.º 29
0
def main():
    global args, best_acc, tb_writer, logger
    args = parser.parse_args()

    init_log('global', logging.INFO)

    if args.log != "":
        add_file_handler('global', args.log, logging.INFO)

    logger = logging.getLogger('global')
    logger.info("\n" + collect_env_info())
    logger.info(args)

    cfg = load_config(args)
    logger.info("config \n{}".format(json.dumps(cfg, indent=4)))

    if args.log_dir:
        tb_writer = SummaryWriter(args.log_dir)
    else:
        tb_writer = Dummy()

    # build dataset
    train_loader, val_loader = build_data_loader(cfg)

    if args.arch == 'Custom':
        model = Custom(anchors=cfg['anchors'])
    elif args.arch == 'Custom_Sky':
        model = Custom_Sky(anchors=cfg['anchors'])
    else:
        exit()
    logger.info(model)

    if args.pretrained:
        model = load_pretrain(model, args.pretrained)

    # print(summary(model=model, input_size=(3, 511, 511), batch_size=1))
    model = model.cuda()
    dist_model = torch.nn.DataParallel(model,
                                       list(range(
                                           torch.cuda.device_count()))).cuda()

    if args.resume and args.start_epoch != 0:
        model.features.unfix((args.start_epoch - 1) / args.epochs)

    optimizer, lr_scheduler = build_opt_lr(model, cfg, args, args.start_epoch)
    # optionally resume from a checkpoint
    if args.resume:
        print(args.resume)
        assert os.path.isfile(args.resume), '{} is not a valid file'.format(
            args.resume)
        model, optimizer, args.start_epoch, best_acc, arch = restore_from(
            model, optimizer, args.resume)
        dist_model = torch.nn.DataParallel(
            model, list(range(torch.cuda.device_count()))).cuda()

    logger.info(lr_scheduler)

    logger.info('model prepare done')

    train(train_loader, dist_model, optimizer, lr_scheduler, args.start_epoch,
          cfg)
Exemplo n.º 30
0
def main(args):
    cfg_from_file(args.config)
    cfg.save_name = args.save_name
    cfg.save_path = args.save_path
    cfg.resume_file = args.resume_file
    cfg.config = args.config
    cfg.batch_size = args.batch_size
    cfg.num_workers = args.num_workers
    save_path = join(args.save_path, args.save_name)
    if not exists(save_path):
        makedirs(save_path)
    resume_file = args.resume_file
    init_log('global', logging.INFO)
    add_file_handler('global', os.path.join(save_path, 'logs.txt'),
                     logging.INFO)
    logger.info("Version Information: \n{}\n".format(commit()))
    logger.info("config \n{}".format(json.dumps(cfg, indent=4)))
    start_epoch = 0

    model = ModelBuilder().cuda()
    if cfg.backbone.pretrained:
        load_pretrain(model.backbone,
                      join('pretrained_net', cfg.backbone.pretrained))

    train_dataset = Datasets()
    val_dataset = Datasets(is_train=False)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=False,
                                               num_workers=args.num_workers,
                                               pin_memory=False,
                                               drop_last=True)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.num_workers,
                                             pin_memory=False,
                                             drop_last=True)

    if resume_file:
        if isfile(resume_file):
            logger.info("=> loading checkpoint '{}'".format(resume_file))
            model, start_epoch = restore_from(model, resume_file)
            start_epoch = start_epoch + 1
            for i in range(start_epoch):
                train_loader.dataset.shuffle()
            logger.info("=> loaded checkpoint '{}' (epoch {})".format(
                resume_file, start_epoch - 1))
        else:
            logger.info("=> no checkpoint found at '{}'".format(resume_file))

    ngpus = torch.cuda.device_count()
    is_dataparallel = False
    if ngpus > 1:
        model = torch.nn.DataParallel(model, list(range(ngpus))).cuda()
        is_dataparallel = True

    if is_dataparallel:
        optimizer, lr_scheduler = build_opt_lr(model.module, start_epoch)
    else:
        optimizer, lr_scheduler = build_opt_lr(model, start_epoch)

    logger.info(lr_scheduler)
    logger.info("model prepare done")

    if args.log:
        writer = SummaryWriter(comment=args.save_name)

    for epoch in range(start_epoch, cfg.train.epoch):
        train_loader.dataset.shuffle()
        if (epoch == np.array(cfg.backbone.unfix_steps)
            ).sum() > 0 or epoch == cfg.train.pretrain_epoch:
            if is_dataparallel:
                optimizer, lr_scheduler = build_opt_lr(model.module, epoch)
            else:
                optimizer, lr_scheduler = build_opt_lr(model, epoch)
        lr_scheduler.step(epoch)
        record_dict_train = train(train_loader, model, optimizer, epoch)
        record_dict_val = validate(val_loader, model, epoch)
        message = 'Train Epoch: [{0}]\t'.format(epoch)
        for k, v in record_dict_train.items():
            message += '{name:s} {loss:.4f}\t'.format(name=k, loss=v)
        logger.info(message)
        message = 'Val Epoch: [{0}]\t'.format(epoch)
        for k, v in record_dict_val.items():
            message += '{name:s} {loss:.4f}\t'.format(name=k, loss=v)
        logger.info(message)

        if args.log:
            for k, v in record_dict_train.items():
                writer.add_scalar('train/' + k, v, epoch)
            for k, v in record_dict_val.items():
                writer.add_scalar('val/' + k, v, epoch)
        if is_dataparallel:
            save_checkpoint(
                {
                    'epoch': epoch,
                    'state_dict': model.module.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'cfg': cfg
                }, epoch, save_path)
        else:
            save_checkpoint(
                {
                    'epoch': epoch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'cfg': cfg
                }, epoch, save_path)