Пример #1
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    # update configs according to CLI args
    if args.resume_from is not None:
        cfg.resume_from = args.resume_from
    cfg.gpus = args.gpus

    if args.autoscale_lr:
        # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
        cfg.optimizer['lr'] = cfg.optimizer['lr'] * cfg.gpus / 8

    # init distributed env first, since logger depends on the dist info.
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)

    # init logger before other steps
    logger = get_root_logger(cfg.log_level)
    logger.info('Distributed training: {}'.format(distributed))

    # set random seeds
    if args.seed is not None:
        logger.info('Set random seed to {}'.format(args.seed))
        set_random_seed(args.seed)
    datasets = [build_dataset(cfg.data.train)]
    model = build_detector(cfg.model,
                           train_cfg=cfg.train_cfg,
                           test_cfg=cfg.test_cfg)
    if cfg.load_from:
        checkpoint = load_checkpoint(model, cfg.load_from, map_location='cpu')
        model.CLASSES = datasets[0].CLASSES
    if cfg.load_from:
        checkpoint = load_checkpoint(model, cfg.load_from, map_location='cpu')
        model.CLASSES = datasets[0].CLASSES
    if cfg.checkpoint_config is not None:
        # save mmdet version, config file content and class names in
        # checkpoints as meta data
        cfg.checkpoint_config.meta = dict(mmdet_version=__version__,
                                          config=cfg.text,
                                          CLASSES=datasets[0].CLASSES)

    data_loader = build_dataloader(datasets[0],
                                   imgs_per_gpu=cfg.data.imgs_per_gpu,
                                   workers_per_gpu=cfg.data.workers_per_gpu,
                                   num_gpus=cfg.gpus,
                                   dist=False,
                                   shuffle=False)
    # put model on gpus
    model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()
    model.train()
    if hasattr(model, 'module'):
        model_load = model.module
    optimizer_all = obj_from_dict(cfg.optimizer, torch.optim,
                                  dict(params=model_load.parameters()))
    optimizer = obj_from_dict(cfg.optimizer, torch.optim,
                              dict(params=model_load.agg.parameters()))
    check_video = None
    start_epoch = 0
    meta = None
    epoch = start_epoch
    vis = visdom.Visdom(env='fuse_c')
    loss_cls_window = vis.line(X=torch.zeros((1, )).cpu(),
                               Y=torch.zeros((1)).cpu(),
                               opts=dict(xlabel='minibatches',
                                         ylabel='Loss of classification',
                                         title='Loss of classification ',
                                         legend=['Loss of classification']))

    loss_init_window = vis.line(X=torch.zeros((1, )).cpu(),
                                Y=torch.zeros((1)).cpu(),
                                opts=dict(xlabel='minibatches',
                                          ylabel='Loss of init reppoint',
                                          title='Loss of init reppoint',
                                          legend=['Loss of init reppoint']))
    loss_refine_window = vis.line(X=torch.zeros((1, )).cpu(),
                                  Y=torch.zeros((1)).cpu(),
                                  opts=dict(xlabel='minibatches',
                                            ylabel='Loss of refine reppoint',
                                            title='Loss of refine reppoint',
                                            legend=['Loss of refine reppoint'
                                                    ]))
    loss_total_window = vis.line(X=torch.zeros((1, )).cpu(),
                                 Y=torch.zeros((1)).cpu(),
                                 opts=dict(xlabel='minibatches',
                                           ylabel='Loss all',
                                           title='Loss all',
                                           legend=['Loss all']))
    loss_trans_window = vis.line(X=torch.zeros((1, )).cpu(),
                                 Y=torch.zeros((1)).cpu(),
                                 opts=dict(xlabel='minibatches',
                                           ylabel='Loss trans',
                                           title='Loss trans',
                                           legend=['Loss trans']))
    training_sample = 0
    for e in range(cfg.total_epochs):
        i = 0
        if epoch % 1 == 0:
            if meta is None:
                meta = dict(epoch=epoch + 1, iter=i)
            else:
                meta.update(epoch=epoch + 1, iter=i)
            checkpoint = {
                'meta': meta,
                'state_dict': weights_to_cpu(model.state_dict())
            }
            print()
            if optimizer_all is not None:
                checkpoint['optimizer'] = optimizer_all.state_dict()
            if not os.path.exists(cfg.work_dir):
                os.mkdir(cfg.work_dir)
            filename = os.path.join(cfg.work_dir, 'epoch_{}.pth'.format(epoch))
            torch.save(checkpoint, filename)
        for i, data in enumerate(data_loader):
            # if len(data['gt_bboxes'].data[0][0]) == 0:
            #	 continue
            optimizer.zero_grad()
            optimizer_all.zero_grad()
            reference_id = (data['img_meta'].data[0][0]['filename'].split('/')
                            [-1]).split('.')[0]
            video_id = data['img_meta'].data[0][0]['filename'].split('/')[-2]
            print('start image:', data['img_meta'].data[0][0]['filename'])
            print('end image:', data['img_meta'].data[-1][-1]['filename'])
            # print(len(data['img'].data),len(data['img'].data[0]))
            # exit()
            for m in range(len(data['img_meta'].data)):
                start_name = data['img_meta'].data[m][0]['filename'].split(
                    '/')[-2]
                # print(data['img_meta'].data[m][0]['filename'])
                for n in range(len(data['img_meta'].data[m])):
                    check_name = data['img_meta'].data[m][n]['filename'].split(
                        '/')[-2]
                    # print(data['img_meta'].data[m][n]['filename'])
                    if start_name != check_name:
                        print('end of video')
                        data['img_meta'].data[m][n] = data['img_meta'].data[m][
                            0]
                        data['gt_bboxes'].data[m][n] = data['gt_bboxes'].data[
                            m][0]
                        data['gt_labels'].data[m][n] = data['gt_labels'].data[
                            m][0]
                        data['img'].data[m][n] = data['img'].data[m][0]

            # losses,loss_trans=model(return_loss=True, **data)
            losses = model(return_loss=True, **data)
            # print(losses)
            if isinstance(losses, list):

                loss_all = []
                log = []
                for p in range(len(losses)):
                    # print(p)
                    # print(losses[p])
                    loss, log_var = parse_losses(losses[p])
                    loss_all.append(loss)
                    log.append(log_var)
            else:
                losses, log_vars = parse_losses(losses)
            if isinstance(losses, list):
                losses = loss_all[0] + 0.5 * loss_all[1] + 0.5 * loss_all[
                    2] + 0.5 * loss_all[3]
                losses = losses / 2.5
            # print(loss_trans.shape)
            # loss_trans=torch.mean(loss_trans)*0.1
            # losses=losses+loss_trans
            # if losses.item()>10:
            #	 losses.backward(retain_graph=False)
            #	 optimizer.zero_grad()
            #	 continue

            losses.backward()
            if epoch < 10:
                optimizer.step()
            else:
                optimizer_all.step()
            # if training_sample<700:
            # 	optimizer.step()
            # else:
            # 	optimizer_all.step()
            # print('transform kernel check',model.module.agg.trans_kernel.sum().item())
            log_vars = log[0]

            vis.line(X=torch.ones(1).cpu() * training_sample,
                     Y=(log_vars['loss_cls']) * torch.ones(1).cpu(),
                     win=loss_cls_window,
                     update='append')
            vis.line(X=torch.ones(1).cpu() * training_sample,
                     Y=(log_vars['loss_pts_init']) * torch.ones(1).cpu(),
                     win=loss_init_window,
                     update='append')
            vis.line(X=torch.ones(1).cpu() * training_sample,
                     Y=(log_vars['loss_pts_refine']) * torch.ones(1).cpu(),
                     win=loss_refine_window,
                     update='append')
            vis.line(X=torch.ones(1).cpu() * training_sample,
                     Y=(losses).item() * torch.ones(1).cpu(),
                     win=loss_total_window,
                     update='append')
            # vis.line(
            #		 X=torch.ones(1).cpu() * training_sample,
            #		 Y=loss_trans.item() * torch.ones(1).cpu(),
            #		 win=loss_trans_window,
            #		 update='append')

            print('agg')
            print('epoch:',epoch,'index:',i,'video_id:',video_id,'reference_id:',reference_id, \
              'loss_cls:',log_vars['loss_cls'],'loss_init_box:',log_vars['loss_pts_init'], \
               'loss_refine_box:',log_vars['loss_pts_refine'])
            log_vars = log[1]
            print('refer')
            print('epoch:',epoch,'index:',i,'video_id:',video_id,'reference_id:',reference_id, \
              'loss_cls:',log_vars['loss_cls'],'loss_init_box:',log_vars['loss_pts_init'], \
               'loss_refine_box:',log_vars['loss_pts_refine'])
            log_vars = log[2]
            print('support')
            print('epoch:',epoch,'index:',i,'video_id:',video_id,'reference_id:',reference_id, \
              'loss_cls:',log_vars['loss_cls'],'loss_init_box:',log_vars['loss_pts_init'], \
               'loss_refine_box:',log_vars['loss_pts_refine'])
            training_sample += 1
            # if i % 300 == 0:
            # 	if meta is None:
            # 		meta = dict(epoch=epoch + 1, iter=i)
            # 	else:
            # 		meta.update(epoch=epoch + 1, iter=i)
            # 	checkpoint = {
            # 		'meta': meta,
            # 		'state_dict': weights_to_cpu(model.state_dict())
            # 	}

            # 	if optimizer_all is not None:
            # 		checkpoint['optimizer'] = optimizer_all.state_dict()
            # 	if not os.path.exists(cfg.work_dir):
            # 		os.mkdir(cfg.work_dir)
            # 	filename=os.path.join(cfg.work_dir,'epoch_{}_{}.pth'.format(epoch,i))
            # 	torch.save(checkpoint,filename)
        epoch += 1
Пример #2
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    # update configs according to CLI args
    if args.resume_from is not None:
        cfg.resume_from = args.resume_from
    cfg.gpus = args.gpus

    cfg.optimizer['lr'] = cfg.optimizer['lr'] * cfg.gpus / 8

    # init distributed env first, since logger depends on the dist info.
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)

    # init logger before other steps
    logger = get_root_logger(cfg.log_level)
    logger.info('Distributed training: {}'.format(distributed))

    # set random seeds
    if args.seed is not None:
        logger.info('Set random seed to {}'.format(args.seed))
        set_random_seed(args.seed)
    datasets = [build_dataset(cfg.data.train)]
    model = build_detector(cfg.model,
                           train_cfg=cfg.train_cfg,
                           test_cfg=cfg.test_cfg)
    if cfg.load_from:
        checkpoint = load_checkpoint(model, cfg.load_from, map_location='cpu')
        model.CLASSES = datasets[0].CLASSES
    if cfg.load_from:
        checkpoint = load_checkpoint(model, cfg.load_from, map_location='cpu')
        model.CLASSES = datasets[0].CLASSES
    if cfg.checkpoint_config is not None:
        # save mmdet version, config file content and class names in
        # checkpoints as meta data
        cfg.checkpoint_config.meta = dict(mmdet_version=__version__,
                                          config=cfg.text,
                                          CLASSES=datasets[0].CLASSES)

    data_loader = build_dataloader(datasets[0],
                                   imgs_per_gpu=cfg.data.imgs_per_gpu,
                                   workers_per_gpu=cfg.data.workers_per_gpu,
                                   num_gpus=cfg.gpus,
                                   dist=False,
                                   shuffle=False)
    # put model on gpus
    model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()
    model.train()
    if hasattr(model, 'module'):
        model_load = model.module
    optimizer = obj_from_dict(cfg.optimizer, torch.optim,
                              dict(params=model_load.parameters()))

    check_video = None
    start_epoch = 0
    meta = None
    epoch = start_epoch
    for e in range(cfg.total_epochs):
        i = 0
        print(data_loader.__len__())
        for i, data in enumerate(data_loader):
            reference_id = (data['img_meta'].data[0][0]['filename'].split('/')
                            [-1]).split('.')[0]
            video_id = data['img_meta'].data[0][0]['filename'].split('/')[-2]
            losses = model(return_loss=True, **data)
            losses, log_vars = parse_losses(losses)
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
            # print('transform kernel check',model.module.agg.trans_kernel.sum().item())
            print('epoch:',epoch,'i:',i,'video_id:',video_id,'reference_id:',reference_id, \
              'loss_cls:',log_vars['loss_cls'],'loss_init_box:',log_vars['loss_pts_init'], \
               'loss_refine_box:',log_vars['loss_pts_refine'])
            if i % 1000 == 0:
                if meta is None:
                    meta = dict(epoch=epoch + 1, iter=i)
                else:
                    meta.update(epoch=epoch + 1, iter=i)
                checkpoint = {
                    'meta': meta,
                    'state_dict': weights_to_cpu(model.state_dict())
                }

                if optimizer is not None:
                    checkpoint['optimizer'] = optimizer.state_dict()
                if not os.path.exists(cfg.work_dir):
                    os.mkdir(cfg.work_dir)
                filename = os.path.join(cfg.work_dir,
                                        'epoch_{}_{}.pth'.format(epoch, i))
                torch.save(checkpoint, filename)

        if epoch % 1 == 0:
            if meta is None:
                meta = dict(epoch=epoch + 1, iter=i)
            else:
                meta.update(epoch=epoch + 1, iter=i)
            checkpoint = {
                'meta': meta,
                'state_dict': weights_to_cpu(model.state_dict())
            }

            if optimizer is not None:
                checkpoint['optimizer'] = optimizer.state_dict()
            if not os.path.exists(cfg.work_dir):
                os.mkdir(cfg.work_dir)
            filename = os.path.join(cfg.work_dir, 'epoch_{}.pth'.format(epoch))
            torch.save(checkpoint, filename)

        epoch += 1