示例#1
0
def train(args):
    if args.config_file != "":
        cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir and not os.path.exists(output_dir):
        os.makedirs(output_dir)
    shutil.copy(args.config_file, cfg.OUTPUT_DIR)

    num_gpus = torch.cuda.device_count()

    logger = setup_logger('reid_baseline', output_dir, 0)
    logger.info('Using {} GPUS'.format(num_gpus))
    logger.info(args)
    logger.info('Running with config:\n{}'.format(cfg))

    train_dl, val_dl, num_query, num_classes = make_dataloader(cfg, num_gpus)

    model = build_model(cfg, num_classes)
    # print(model)
    loss_func = make_loss(cfg, num_classes)

    trainer = BaseTrainer(cfg, model, train_dl, val_dl, loss_func, num_query,
                          num_gpus)

    for epoch in range(trainer.epochs):
        for batch in trainer.train_dl:
            trainer.step(batch)
            trainer.handle_new_batch()
        trainer.handle_new_epoch()
示例#2
0
def test(cfg, args):
    # train_dataset = dataset.HandGraph(cfg.DATASET.ROOT,
    # cfg.DATASET.TRAIN_SET,
    # 'png')
    # train_dataset.visualize_data()
    train_dataset = make_dataloader(cfg, is_train=True).dataset
    train_dataset.visualize_data()
示例#3
0
def dataset():
    vocab = Vocab('data/vocab', 50000)
    dataloader = make_dataloader('data/conv_dev.jsonl', 32, vocab, 200, False,
                                 False)
    for batch in dataloader:
        for key in batch:
            if key != 'id':
                print(key, batch[key].size())
        break
示例#4
0
def run(args):
    gpuids = tuple(map(int, args.gpus.split(",")))

    nnet = TasNet()
    trainer = SiSnrTrainer(nnet,
                           gpuid=gpuids,
                           checkpoint=args.checkpoint,
                           resume=args.resume,
                           **trainer_conf)

    train_loader = make_dataloader(
        train=True,
        #data_kwargs=train_data,
        batch_size=args.batch_size,
        chunk_size=chunk_size,
        num_workers=args.num_workers,
    )  #online=True, cone=False)
    dev_loader = make_dataloader(
        train=False,
        #data_kwargs=dev_data,
        batch_size=args.batch_size,
        chunk_size=chunk_size,
        num_workers=args.num_workers,
    )  #online=True, cone=False)

    #dataset = ConeData(dev_data['data_path'], num_spks)
    dataset = OnlineSimulationDataset(vctk_audio, ms_snsd, 48,
                                      simulation_config_test, truncator,
                                      "./test_cache", 50)
    fusion_list = []
    mix_list = []
    ref_list = []
    for i in range(len(dataset)):
        input = dataset.__getitem__(i)
        fusion_list.append(Prep(input))
        mix_list.append(input[0])
        ref_list.append(input[3])

    trainer.run(train_loader,
                dev_loader,
                num_epochs=args.epochs,
                fusion_list=fusion_list,
                mix_list=mix_list,
                ref_list=ref_list)
示例#5
0
def run(args):
    gpuids = tuple(map(int, args.gpus.split(",")))

    nnet = ConvTasNet(**nnet_conf)
    trainer = SiSnrTrainer(nnet,
                           gpuid=gpuids,
                           checkpoint=args.checkpoint,
                           resume=args.resume,
                           **trainer_conf)

    for conf, fname in zip([nnet_conf, trainer_conf],
                           ["mdl.json", "trainer.json"]):
        dump_json(conf, args.checkpoint, fname)

    train_loader = make_dataloader(shuffle=True,
                                   data_kwargs=train_data,
                                   batch_size=args.batch_size,
                                   chunk_size=chunk_size)
    dev_loader = make_dataloader(shuffle=False,
                                 data_kwargs=dev_data,
                                 batch_size=args.batch_size,
                                 chunk_size=chunk_size)

    trainer.run(train_loader, dev_loader, num_epochs=args.epochs)
示例#6
0
def main():
    output_dir = cfg.OUTPUT_DIR
    if output_dir and not os.path.exists(output_dir):
        os.makedirs(output_dir)
    num_gpus = torch.cuda.device_count()
    logger = setup_logger('reid_baseline', output_dir, 0)
    logger.info('Using {} GPUS'.format(num_gpus))
    logger.info('Running with config:\n{}'.format(cfg))
    train_dl, val_dl, num_query, num_classes = make_dataloader(cfg, num_gpus)
    model = build_model(cfg, num_classes)
    loss = make_loss(cfg, num_classes)
    trainer = BaseTrainer(cfg, model, train_dl, val_dl, loss, num_query,
                          num_gpus)
    for epoch in range(trainer.epochs):
        for batch in trainer.train_dl:
            trainer.step(batch)
            trainer.handle_new_batch()
        trainer.handle_new_epoch()
示例#7
0
def main():
    parser = argparse.ArgumentParser(description="Baseline Training")
    parser.add_argument("--config_file", default="", help="path to config file", type=str)
    parser.add_argument("opts", help="Modify config options using the command-line", default=None,
                        nargs=argparse.REMAINDER)
    args = parser.parse_args()
    if args.config_file != "":
        cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir and not os.path.exists(output_dir):
        os.makedirs(output_dir)
    num_gpus = 0
    device = torch.device("cpu")
    if cfg.MODEL.DEVICE == 'cuda' and torch.cuda.is_available():
        num_gpus = len(cfg.MODEL.DEVICE_IDS)-1
        device_ids = cfg.MODEL.DEVICE_IDS.strip("d")
        print(device_ids)
        device = torch.device("cuda:{0}".format(device_ids))

    logger = setup_logger('baseline', output_dir, 0)
    logger.info('Using {} GPUS'.format(num_gpus))
    logger.info('Running with config:\n{}'.format(cfg))


    train_dl, val_dl = make_dataloader(cfg, num_gpus)

    model = build_model(cfg)

    loss = make_loss(cfg, device)

    trainer = BaseTrainer(cfg, model, train_dl, val_dl,
                                  loss, num_gpus, device)

    logger.info(type(model))
    logger.info(loss)
    logger.info(trainer)
    for epoch in range(trainer.epochs):
        for batch in trainer.train_dl:
            trainer.step(batch)
            trainer.handle_new_batch()
        trainer.handle_new_epoch()
示例#8
0
 def predict_proba(self, x):
     '''
     x (list(sample(dict)))
         sample (dict): keys are 's1', 's2', 'label'
     '''
     dataloader = make_dataloader(x, 64, self.vocab, self.max_len, False,
                                  self.use_cuda)
     preds = []
     for batch in dataloader:
         keys = ('s1', 's1_len', 's1_mask', 's2', 's2_len')
         outputs = self.model(*get_vars(batch,
                                        *keys,
                                        use_cuda=self.use_cuda))  # (B*3)
         if self.activate:
             outputs = F.softmax(outputs, 1)
         else:
             outputs = F.log_softmax(outputs, 1)
         preds.extend(outputs.cpu().data.tolist())
     return np.asarray(preds)
示例#9
0
def train():
    trainloader, testloader = make_dataloader()
    # build model
    model = BasicModel()
    # loss func
    loss_func = nn.CrossEntropyLoss()
    # optimzier
    optimizier = optim.Adam(model.parameters(), lr=1e-3)
    # configuration
    epochs = 10

    # training
    for epoch in range(epochs):
        model.train()
        pbar = tqdm(trainloader)
        for image, label in pbar:
            # forward
            output = model(image)
            # compute loss
            loss = loss_func(output, label)
            optimizier.zero_grad()
            loss.backward()
            optimizier.step()
            # compute batch accuracy
            predicts = torch.argmax(output, dim=-1)
            accu = torch.sum(predicts == label).float() / image.size(0)
            pbar.set_description('Epoch:[{:02d}]-Loss:{:.3f}-Accu:{:.3f}'\
                                 .format(epoch+1,loss.item(),accu.item()))
        # testing
        model.eval()
        with torch.no_grad():
            corrects = 0
            total_nums = 0
            for image, label in tqdm(testloader):
                output = model(image)
                predicts = torch.argmax(output, dim=-1)
                corrects += (predicts == label).sum()
                total_nums += label.size(0)
            test_accu = corrects.float() / total_nums
            print('Epoch:[{:02d}]-Test_Accu:{:.3f}'.format(
                epoch + 1, test_accu.item()))
示例#10
0
def build_loader(vocab, hps):
    mode = hps.mode.replace('hypo', '')
    if mode == 'train':
        single_pass = False
        bsize = {'train': hps.batch_size, 'val': hps.batch_size}
    elif mode == 'val':
        single_pass = True
        bsize = {'val': hps.batch_size}
    elif mode == 'test':
        single_pass = True
        bsize = {'test': hps.batch_size}
    else:
        raise ValueError('Unknown mode: %s' % hps.mode)

    loader = {}
    args = (vocab, hps.max_steps, single_pass, hps.use_cuda)
    for key in bsize:
        dpath = path.join(hps.data_path, getattr(hps, key + '_data'))
        loader[key] = make_dataloader(dpath, bsize[key], *args)

    return loader
示例#11
0
def main():
    parser = argparse.ArgumentParser(description="ReID Baseline Training")
    parser.add_argument("--config_file",
                        default="",
                        help="path to config file",
                        type=str)
    parser.add_argument("opts",
                        help="Modify config options using the command-line",
                        default=None,
                        nargs=argparse.REMAINDER)
    args = parser.parse_args()
    if args.config_file != "":
        cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir and not os.path.exists(output_dir):
        os.makedirs(output_dir)
    num_gpus = torch.cuda.device_count()
    logger = setup_logger('reid_baseline', output_dir, 0)
    logger.info('Using {} GPUS'.format(num_gpus))
    logger.info('Running with config:\n{}'.format(cfg))
    train_dl, val_dl, num_query, num_classes = make_dataloader(cfg, num_gpus)
    model = build_model(cfg, num_classes)
    loss = make_loss(cfg, num_classes)
    trainer = SGDTrainer(cfg, model, train_dl, val_dl, loss, num_query,
                         num_gpus)
    logger.info('train transform: \n{}'.format(train_dl.dataset.transform))
    logger.info('valid transform: \n{}'.format(val_dl.dataset.transform))
    logger.info(type(model))
    logger.info(loss)
    logger.info(trainer)
    for epoch in range(trainer.epochs):
        for batch in trainer.train_dl:
            trainer.step(batch)
            trainer.handle_new_batch()
        trainer.handle_new_epoch()
示例#12
0
def main(conf):
    '''
    train_set = TACDataset(conf["data"]["train_json"], conf["data"]["segment"], train=True)
    val_set = TACDataset(conf["data"]["dev_json"], conf["data"]["segment"], train=False)

    train_loader = DataLoader(
        train_set,
        shuffle=True,
        batch_size=conf["training"]["batch_size"],
        num_workers=conf["training"]["num_workers"],
        drop_last=True,
    )
    val_loader = DataLoader(
        val_set,
        shuffle=False,
        batch_size=conf["training"]["batch_size"],
        num_workers=conf["training"]["num_workers"],
        drop_last=True,
    )
    '''

    train_loader = make_dataloader(train=True,
                                   batch_size=conf['training']["batch_size"],
                                   chunk_size=conf['data']['chunk'],
                                   num_workers=conf['training']['num_workers'])
    val_loader = make_dataloader(train=False,
                                 batch_size=conf['training']['batch_size'],
                                 chunk_size=conf['data']['chunk'],
                                 num_workers=conf['training']['num_workers'])
    #Prep(train_loader)
    #Prep(val_loader)
    #for data in train_loader:
    #print(type(data[0]))

    model = TasNet()
    # model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    # params = sum([np.prod(p.size()) for p in model_parameters])
    # print(params)
    # exit()
    optimizer = make_optimizer(model.parameters(), **conf["optim"])
    # Define scheduler
    if conf["training"]["half_lr"]:
        scheduler = ReduceLROnPlateau(optimizer=optimizer,
                                      factor=0.5,
                                      patience=conf["training"]["patience"])
    else:
        scheduler = None
    # Just after instantiating, save the args. Easy loading in the future.
    exp_dir = conf["main_args"]["exp_dir"]
    os.makedirs(exp_dir, exist_ok=True)
    conf_path = os.path.join(exp_dir, "conf.yml")
    with open(conf_path, "w") as outfile:
        yaml.safe_dump(conf, outfile)

    # Define Loss function.
    loss_func = MSELoss()
    system = AngleSystem(
        model=model,
        loss_func=loss_func,
        optimizer=optimizer,
        train_loader=train_loader,
        val_loader=val_loader,
        scheduler=scheduler,
        config=conf,
    )

    # Define callbacks
    # Define callbacks
    callbacks = []
    checkpoint_dir = os.path.join(exp_dir, "checkpoints/")
    checkpoint = ModelCheckpoint(
        checkpoint_dir,
        monitor="val_loss",
        mode="min",
        save_top_k=conf["training"]["save_top_k"],
        verbose=True,
    )
    callbacks.append(checkpoint)
    if conf["training"]["early_stop"]:
        callbacks.append(
            EarlyStopping(monitor="val_loss",
                          mode="min",
                          patience=conf["training"]["patience"],
                          verbose=True))

    # Don't ask GPU if they are not available.
    gpus = [-1]
    trainer = pl.Trainer(
        max_epochs=conf["training"]["epochs"],
        callbacks=callbacks,
        default_root_dir=exp_dir,
        #gpus=gpus,
        distributed_backend="ddp",
        gradient_clip_val=conf["training"]["gradient_clipping"],
    )
    trainer.fit(system)

    best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()}
    with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f:
        json.dump(best_k, f, indent=0)

    state_dict = torch.load(checkpoint.best_model_path)
    system.load_state_dict(state_dict=state_dict["state_dict"])
    system.cpu()

    #to_save = system.model.serialize()
    #to_save.update(train_set.get_infos())
    torch.save(system.model.state_dict(),
               os.path.join(exp_dir, "best_model.ckpt"))
示例#13
0
def main_worker(gpus, ngpus_per_node, args, final_output_dir, tb_log_dir):
    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    #os.environ['CUDA_VISIBLE_DEVICES']=gpus

    # Parallel setting
    print("Use GPU: {} for training".format(gpus))

    update_config(cfg, args)

    #test(cfg, args)

    # logger setting
    logger, _ = setup_logger(final_output_dir, args.rank, 'train')

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    # model initilization
    model = eval(cfg.MODEL.NAME + '.get_pose_net')(cfg, is_train=True)

    # load pretrained model before DDP initialization
    checkpoint_file = os.path.join(final_output_dir, 'model_best.pth.tar')

    if cfg.AUTO_RESUME:
        if os.path.exists(checkpoint_file):
            checkpoint = torch.load(checkpoint_file, map_location='cpu')
            state_dict = checkpoint['state_dict']

            for key in list(state_dict.keys()):
                new_key = key.replace("module.", "")
                state_dict[new_key] = state_dict.pop(key)
            model.load_state_dict(state_dict)
            logger.info("=> loaded checkpoint '{}' (epoch {})".format(
                checkpoint_file, checkpoint['epoch']))

    elif cfg.MODEL.HRNET_PRETRAINED:
        logger.info("=> loading a pretrained model '{}'".format(
            cfg.MODEL.PRETRAINED))
        checkpoint = torch.load(cfg.MODEL.HRNET_PRETRAINED, map_location='cpu')

        state_dict = checkpoint['state_dict']
        for key in list(state_dict.keys()):
            new_key = key.replace("module.", "")
            state_dict[new_key] = state_dict.pop(key)

        model.load_state_dict(state_dict)

    # copy model file
    this_dir = os.path.dirname(__file__)
    shutil.copy2(
        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
        final_output_dir)
    # copy configuration file
    config_dir = args.cfg
    shutil.copy2(os.path.join(args.cfg), final_output_dir)

    # calculate GFLOPS
    dump_input = torch.rand(
        (1, 3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0]))

    logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    #ops, params = get_model_complexity_info(
    #    model, (3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0]),
    #    as_strings=True, print_per_layer_stat=True, verbose=True)
    # FP16 SETTING
    if cfg.FP16.ENABLED:
        assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled."

    if cfg.FP16.STATIC_LOSS_SCALE != 1.0:
        if not cfg.FP16.ENABLED:
            print(
                "Warning:  if --fp16 is not used, static_loss_scale will be ignored."
            )

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.MODEL.SYNC_BN and not cfg.cfg.DISTRIBUTED:
        print(
            'Warning: Sync BatchNorm is only supported in distributed training.'
        )

    # Distributed Computing
    master = True
    if cfg.DISTRIBUTED:  # This block is not available
        args.local_rank += int(gpus[0])
        print('This process is using GPU', args.local_rank)
        device = args.local_rank
        master = device == int(gpus[0])
        dist.init_process_group(backend='nccl')
        if cfg.MODEL.SYNC_BN:
            model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if gpus is not None:
            torch.cuda.set_device(device)
            model.cuda(device)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            # workers = int(workers / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model,
                device_ids=[device],
                output_device=device,
                find_unused_parameters=True)
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    else:  # implement this block
        gpu_ids = eval('[' + gpus + ']')
        device = gpu_ids[0]
        print('This process is using GPU', str(device))
        model = torch.nn.DataParallel(model, gpu_ids).cuda(device)

    # Prepare loss functions
    criterion = {}
    if cfg.LOSS.WITH_HEATMAP_LOSS:
        criterion['heatmap_loss'] = HeatmapLoss().cuda()
    if cfg.LOSS.WITH_POSE2D_LOSS:
        criterion['pose2d_loss'] = JointsMSELoss().cuda()
    if cfg.LOSS.WITH_BONE_LOSS:
        criterion['bone_loss'] = BoneLengthLoss().cuda()
    if cfg.LOSS.WITH_JOINTANGLE_LOSS:
        criterion['jointangle_loss'] = JointAngleLoss().cuda()

    best_perf = 1e9
    best_model = False
    last_epoch = -1

    # optimizer must be initilized after model initilization
    optimizer = get_optimizer(cfg, model)

    if cfg.FP16.ENABLED:
        optimizer = FP16_Optimizer(
            optimizer,
            static_loss_scale=cfg.FP16.STATIC_LOSS_SCALE,
            dynamic_loss_scale=cfg.FP16.DYNAMIC_LOSS_SCALE,
            verbose=False)

    begin_epoch = cfg.TRAIN.BEGIN_EPOCH

    if not cfg.AUTO_RESUME and cfg.MODEL.HRNET_PRETRAINED:
        optimizer.load_state_dict(checkpoint['optimizer'])

    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        begin_epoch = checkpoint['epoch']
        best_perf = checkpoint['loss']
        optimizer.load_state_dict(checkpoint['optimizer'])

        if 'train_global_steps' in checkpoint.keys() and \
        'valid_global_steps' in checkpoint.keys():
            writer_dict['train_global_steps'] = checkpoint[
                'train_global_steps']
            writer_dict['valid_global_steps'] = checkpoint[
                'valid_global_steps']

    if cfg.FP16.ENABLED:
        logger.info("=> Using FP16 mode")
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer.optimizer,
            cfg.TRAIN.LR_STEP,
            cfg.TRAIN.LR_FACTOR,
            last_epoch=begin_epoch)
    elif cfg.TRAIN.LR_SCHEDULE == 'warmup':
        from utils.utils import get_linear_schedule_with_warmup
        lr_scheduler = get_linear_schedule_with_warmup(
            optimizer=optimizer,
            num_warmup_steps=cfg.TRAIN.WARMUP_EPOCHS,
            num_training_steps=cfg.TRAIN.END_EPOCH - cfg.TRAIN.BEGIN_EPOCH,
            last_epoch=begin_epoch)
    elif cfg.TRAIN.LR_SCHEDULE == 'multi_step':
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer,
            cfg.TRAIN.LR_STEP,
            cfg.TRAIN.LR_FACTOR,
            last_epoch=begin_epoch)
    else:
        print('Unknown learning rate schedule!')
        exit()

    # Data loading code
    train_loader_dict = make_dataloader(cfg,
                                        is_train=True,
                                        distributed=cfg.DISTRIBUTED)
    valid_loader_dict = make_dataloader(cfg,
                                        is_train=False,
                                        distributed=cfg.DISTRIBUTED)

    for i, (dataset_name,
            train_loader) in enumerate(train_loader_dict.items()):
        logger.info(
            'Training Loader {}/{}:\n'.format(i + 1, len(train_loader_dict)) +
            str(train_loader.dataset))
    for i, (dataset_name,
            valid_loader) in enumerate(valid_loader_dict.items()):
        logger.info('Validation Loader {}/{}:\n'.format(
            i + 1, len(valid_loader_dict)) + str(valid_loader.dataset))

    #writer_dict['writer'].add_graph(model, (dump_input, ))
    """
    Start training
    """
    start_time = time.time()

    with torch.autograd.set_detect_anomaly(True):
        for epoch in range(begin_epoch + 1, cfg.TRAIN.END_EPOCH + 1):
            epoch_start_time = time.time()
            # shuffle datasets with the sample random seed
            if cfg.DISTRIBUTED:
                for data_loader in train_loader_dict.values():
                    data_loader.sampler.set_epoch(epoch)
            # train for one epoch
            # get_last_lr() returns a list
            logger.info('Start training [{}/{}] lr: {:.4e}'.format(
                epoch, cfg.TRAIN.END_EPOCH - cfg.TRAIN.BEGIN_EPOCH,
                lr_scheduler.get_last_lr()[0]))
            train(cfg,
                  args,
                  master,
                  train_loader_dict,
                  model,
                  criterion,
                  optimizer,
                  epoch,
                  final_output_dir,
                  tb_log_dir,
                  writer_dict,
                  logger,
                  fp16=cfg.FP16.ENABLED,
                  device=device)

            # In PyTorch 1.1.0 and later, you should call `lr_scheduler.step()` after `optimizer.step()`.
            lr_scheduler.step()

            # evaluate on validation set

            if not cfg.WITHOUT_EVAL:
                logger.info('Start evaluating [{}/{}]'.format(
                    epoch, cfg.TRAIN.END_EPOCH - 1))
                with torch.no_grad():
                    recorder = validate(cfg,
                                        args,
                                        master,
                                        valid_loader_dict,
                                        model,
                                        criterion,
                                        final_output_dir,
                                        tb_log_dir,
                                        writer_dict,
                                        logger,
                                        device=device)

                val_total_loss = recorder.avg_total_loss

                best_model = False
                if val_total_loss < best_perf:
                    logger.info(
                        'This epoch yielded a better model with total loss {:.4f} < {:.4f}.'
                        .format(val_total_loss, best_perf))
                    best_perf = val_total_loss
                    best_model = True

            else:
                val_total_loss = 0
                best_model = True

            if master:
                logger.info(
                    '=> saving checkpoint to {}'.format(final_output_dir))
                save_checkpoint(
                    {
                        'epoch': epoch,
                        'model': cfg.EXP_NAME + '.' + cfg.MODEL.NAME,
                        'state_dict': model.state_dict(),
                        'loss': val_total_loss,
                        'optimizer': optimizer.state_dict(),
                        'train_global_steps':
                        writer_dict['train_global_steps'],
                        'valid_global_steps': writer_dict['valid_global_steps']
                    }, best_model, final_output_dir)

            print('\nEpoch {} spent {:.2f} hours\n'.format(
                epoch, (time.time() - epoch_start_time) / 3600))

            #if epoch == 3:break
    if master:
        final_model_state_file = os.path.join(
            final_output_dir, 'final_state{}.pth.tar'.format(gpus))
        logger.info(
            '=> saving final model state to {}'.format(final_model_state_file))
        torch.save(model.state_dict(), final_model_state_file)
        writer_dict['writer'].close()

        print(
            '\n[Training Accomplished] {} epochs spent {:.2f} hours\n'.format(
                cfg.TRAIN.END_EPOCH - begin_epoch + 1,
                (time.time() - start_time) / 3600))
示例#14
0
def main_worker(gpus, ngpus_per_node, args, final_output_dir, tb_log_dir):
    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    #os.environ['CUDA_VISIBLE_DEVICES']=gpus

    # if len(gpus) == 1:
    #     gpus = int(gpus)

    update_config(cfg, args)

    #test(cfg, args)

    # logger setting
    logger, _ = setup_logger(final_output_dir, args.rank, 'train')

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    # model initilization
    model = {
        "ransac": RANSACTriangulationNet,
        "alg": AlgebraicTriangulationNet,
        "vol": VolumetricTriangulationNet,
        "vol_CPM": VolumetricTriangulationNet_CPM,
        "FTL": FTLMultiviewNet
    }[cfg.MODEL.NAME](cfg)

    discriminator = Discriminator(cfg)

    # load pretrained model before DDP initialization
    if cfg.AUTO_RESUME:
        checkpoint_file = os.path.join(final_output_dir, 'model_best.pth.tar')
        if os.path.exists(checkpoint_file):
            checkpoint = torch.load(checkpoint_file,
                                    map_location=torch.device('cpu'))
            state_dict = checkpoint['state_dict']
            D_state_dict = checkpoint['D_state_dict']

            for key in list(state_dict.keys()):
                new_key = key.replace("module.", "")
                state_dict[new_key] = state_dict.pop(key)
            for key in list(D_state_dict.keys()):
                new_key = key.replace("module.", "")
                D_state_dict[new_key] = D_state_dict.pop(key)

            model.load_state_dict(state_dict)
            discriminator.load_state_dict(D_state_dict)
            logger.info("=> Loading checkpoint '{}' (epoch {})".format(
                checkpoint_file, checkpoint['epoch']))
        else:
            print('[Warning] Checkpoint file not found! Wrong path: {}'.format(
                checkpoint_file))

    elif cfg.MODEL.HRNET_PRETRAINED:
        logger.info("=> loading a pretrained model '{}'".format(
            cfg.MODEL.PRETRAINED))
        checkpoint = torch.load(cfg.MODEL.HRNET_PRETRAINED)

        state_dict = checkpoint['state_dict']
        for key in list(state_dict.keys()):
            new_key = key.replace("module.", "")
            state_dict[new_key] = state_dict.pop(key)

        model.load_state_dict(state_dict)

    # initiliaze a optimizer
    # optimizer must be initilized after model initilization
    if cfg.MODEL.TRIANGULATION_MODEL_NAME == "vol":
        optimizer = torch.optim.Adam([{
            'params': model.backbone.parameters(),
            'initial_lr': cfg.TRAIN.LR
        }, {
            'params':
            model.process_features.parameters(),
            'initial_lr':
            cfg.TRAIN.PROCESS_FEATURE_LR
            if hasattr(cfg.TRAIN, "PROCESS_FEATURE_LR") else cfg.TRAIN.LR
        }, {
            'params':
            model.volume_net.parameters(),
            'initial_lr':
            cfg.TRAIN.VOLUME_NET_LR
            if hasattr(cfg.TRAIN, "VOLUME_NET_LR") else cfg.TRAIN.LR
        }],
                                     lr=cfg.TRAIN.LR)
    else:
        optimizer = torch.optim.Adam(
            [{
                'params': filter(lambda p: p.requires_grad,
                                 model.parameters()),
                'initial_lr': cfg.TRAIN.LR
            }],
            lr=cfg.TRAIN.LR)

    D_optimizer = torch.optim.RMSprop([{
        'params':
        filter(lambda p: p.requires_grad, discriminator.parameters()),
        'initial_lr':
        cfg.TRAIN.LR
    }],
                                      lr=cfg.TRAIN.LR)

    # copy model file
    this_dir = os.path.dirname(__file__)
    shutil.copy2(os.path.join(this_dir, '../lib/models', 'triangulation.py'),
                 final_output_dir)
    # copy configuration file
    config_dir = args.cfg
    shutil.copy2(os.path.join(args.cfg), final_output_dir)

    # calculate GFLOPS
    # dump_input = torch.rand(
    #     (1, 4, 3, cfg.MODEL.IMAGE_SIZE[0], cfg.MODEL.IMAGE_SIZE[0])
    # )

    # logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    # FP16 SETTING
    if cfg.FP16.ENABLED:
        assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled."

    if cfg.FP16.STATIC_LOSS_SCALE != 1.0:
        if not cfg.FP16.ENABLED:
            print(
                "Warning:  if --fp16 is not used, static_loss_scale will be ignored."
            )

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if cfg.MODEL.SYNC_BN and not cfg.DISTRIBUTED:
        print(
            'Warning: Sync BatchNorm is only supported in distributed training.'
        )

    if cfg.FP16.ENABLED:
        optimizer = FP16_Optimizer(
            optimizer,
            static_loss_scale=cfg.FP16.STATIC_LOSS_SCALE,
            dynamic_loss_scale=cfg.FP16.DYNAMIC_LOSS_SCALE,
            verbose=False)

    # Distributed Computing
    master = True
    if cfg.DISTRIBUTED:  # This block is not available
        args.local_rank += int(gpus[0])
        print('This process is using GPU', args.local_rank)
        device = args.local_rank
        master = device == int(gpus[0])
        dist.init_process_group(backend='nccl')
        if cfg.MODEL.SYNC_BN:
            model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if gpus is not None:
            torch.cuda.set_device(device)
            model.cuda(device)
            discriminator.cuda(device)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            # workers = int(workers / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model,
                device_ids=[device],
                output_device=device,
                find_unused_parameters=True)
            discriminator = torch.nn.parallel.DistributedDataParallel(
                discriminator,
                device_ids=[device],
                output_device=device,
                find_unused_parameters=True)
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    else:  # implement this block
        gpu_ids = eval('[' + gpus + ']')
        device = gpu_ids[0]
        print('This process is using GPU', str(device))
        model = torch.nn.DataParallel(model, gpu_ids).cuda(device)
        discriminator = torch.nn.DataParallel(discriminator,
                                              gpu_ids).cuda(device)

    # Prepare loss functions
    criterion = {}
    if cfg.LOSS.WITH_HEATMAP_LOSS:
        criterion['heatmap_loss'] = HeatmapLoss().cuda(device)
    if cfg.LOSS.WITH_POSE2D_LOSS:
        criterion['pose2d_loss'] = JointsMSELoss().cuda(device)
    if cfg.LOSS.WITH_POSE3D_LOSS:
        criterion['pose3d_loss'] = Joints3DMSELoss().cuda(device)
    if cfg.LOSS.WITH_VOLUMETRIC_CE_LOSS:
        criterion['volumetric_ce_loss'] = VolumetricCELoss().cuda(device)
    if cfg.LOSS.WITH_BONE_LOSS:
        criterion['bone_loss'] = BoneLengthLoss().cuda(device)
    if cfg.LOSS.WITH_TIME_CONSISTENCY_LOSS:
        criterion['time_consistency_loss'] = Joints3DMSELoss().cuda(device)
    if cfg.LOSS.WITH_KCS_LOSS:
        criterion['KCS_loss'] = None
    if cfg.LOSS.WITH_JOINTANGLE_LOSS:
        criterion['jointangle_loss'] = JointAngleLoss().cuda(device)

    best_perf = 1e9
    best_model = False
    last_epoch = -1

    # load history
    begin_epoch = cfg.TRAIN.BEGIN_EPOCH

    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        begin_epoch = checkpoint['epoch'] + 1
        best_perf = checkpoint['loss']
        optimizer.load_state_dict(checkpoint['optimizer'])
        D_optimizer.load_state_dict(checkpoint['D_optimizer'])

        if 'train_global_steps' in checkpoint.keys() and \
        'valid_global_steps' in checkpoint.keys():
            writer_dict['train_global_steps'] = checkpoint[
                'train_global_steps']
            writer_dict['valid_global_steps'] = checkpoint[
                'valid_global_steps']

    # Floating point 16 mode
    if cfg.FP16.ENABLED:
        logger.info("=> Using FP16 mode")
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer.optimizer,
            cfg.TRAIN.LR_STEP,
            cfg.TRAIN.LR_FACTOR,
            last_epoch=begin_epoch)
    else:
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer,
            cfg.TRAIN.LR_STEP,
            cfg.TRAIN.LR_FACTOR,
            last_epoch=begin_epoch)

    # Data loading code
    train_loader_dict = make_dataloader(cfg,
                                        is_train=True,
                                        distributed=cfg.DISTRIBUTED)
    valid_loader_dict = make_dataloader(cfg,
                                        is_train=False,
                                        distributed=cfg.DISTRIBUTED)

    for i, (dataset_name,
            train_loader) in enumerate(train_loader_dict.items()):
        logger.info(
            'Training Loader {}/{}:\n'.format(i + 1, len(train_loader_dict)) +
            str(train_loader.dataset))
    for i, (dataset_name,
            valid_loader) in enumerate(valid_loader_dict.items()):
        logger.info('Validation Loader {}/{}:\n'.format(
            i + 1, len(valid_loader_dict)) + str(valid_loader.dataset))

    #writer_dict['writer'].add_graph(model, (dump_input, ))
    """
    Start training
    """
    start_time = time.time()

    with torch.autograd.set_detect_anomaly(True):
        for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
            epoch_start_time = time.time()
            # shuffle datasets with the sample random seed
            if cfg.DISTRIBUTED:
                for data_loader in train_loader_dict.values():
                    data_loader.sampler.set_epoch(epoch)
            # train for one epoch
            logger.info('Start training [{}/{}]'.format(
                epoch, cfg.TRAIN.END_EPOCH - 1))
            train(epoch,
                  cfg,
                  args,
                  master,
                  train_loader_dict, [model, discriminator],
                  criterion, [optimizer, D_optimizer],
                  final_output_dir,
                  tb_log_dir,
                  writer_dict,
                  logger,
                  device,
                  fp16=cfg.FP16.ENABLED)

            # In PyTorch 1.1.0 and later, you should call `lr_scheduler.step()` after `optimizer.step()`.
            lr_scheduler.step()

            # evaluate on validation set
            if not cfg.WITHOUT_EVAL:
                logger.info('Start evaluating [{}/{}]'.format(
                    epoch, cfg.TRAIN.END_EPOCH - 1))
                with torch.no_grad():
                    recorder = validate(cfg, args, master, valid_loader_dict,
                                        [model, discriminator], criterion,
                                        final_output_dir, tb_log_dir,
                                        writer_dict, logger, device)

                val_total_loss = recorder.avg_total_loss

                if val_total_loss < best_perf:
                    logger.info(
                        'This epoch yielded a better model with total loss {:.4f} < {:.4f}.'
                        .format(val_total_loss, best_perf))
                    best_perf = val_total_loss
                    best_model = True
                else:
                    best_model = False

            else:
                val_total_loss = 0
                best_model = True

            logger.info('=> saving checkpoint to {}'.format(final_output_dir))
            save_checkpoint(
                {
                    'epoch': epoch,
                    'model': cfg.EXP_NAME + '.' + cfg.MODEL.NAME,
                    'state_dict': model.state_dict(),
                    'D_state_dict': discriminator.state_dict(),
                    'loss': val_total_loss,
                    'optimizer': optimizer.state_dict(),
                    'D_optimizer': D_optimizer.state_dict(),
                    'train_global_steps': writer_dict['train_global_steps'],
                    'valid_global_steps': writer_dict['valid_global_steps']
                }, best_model, final_output_dir)

            print('\nEpoch {} spent {:.2f} hours\n'.format(
                epoch, (time.time() - epoch_start_time) / 3600))

            #if epoch == 3:break
    if master:
        final_model_state_file = os.path.join(
            final_output_dir, 'final_state{}.pth.tar'.format(gpus))
        logger.info(
            '=> saving final model state to {}'.format(final_model_state_file))
        torch.save(model.state_dict(), final_model_state_file)
        writer_dict['writer'].close()

        print(
            '\n[Training Accomplished] {} epochs spent {:.2f} hours\n'.format(
                cfg.TRAIN.END_EPOCH - begin_epoch + 1,
                (time.time() - start_time) / 3600))
示例#15
0
    dev_data = read_infile(args.dev_file)

    tokenizer = AutoTokenizer.from_pretrained(args.model_name, return_token_type_ids=True)
    model = AutoModel.from_pretrained(args.model_name)

    train_dataset = make_dataset(tokenizer, train_data, pos_label=args.pos_label, 
                                 answer_field=args.answer_field, 
                                 first_key=args.first_sentence,
                                 second_key=args.second_sentence,
                                 device="cuda:0")
    dev_dataset = make_dataset(tokenizer, dev_data, pos_label=args.pos_label, 
                               answer_field=args.answer_field, 
                               first_key=args.first_sentence,
                               second_key=args.second_sentence,
                               device="cuda:0")
    train_dataloader = make_dataloader(train_dataset, batch_size=args.train_batch_size)
    dev_dataloader = make_dataloader(dev_dataset, batch_size=args.dev_batch_size, shuffle=False)

    if args.batch_size is None:
        args.batch_size = args.train_batch_size
    if args.batch_size % args.train_batch_size != 0:
        raise ValueError("GPU batch size should divide batch size per update.")
    batches_per_update = args.batch_size // args.train_batch_size
    bert_classifier = BertClassifier(model, state_key="pooler_output", 
                                     lr=args.lr, accumulate_gradients=batches_per_update).to("cuda:0")

    best_score, best_weights = 0.0, None

    if args.load_file:
        bert_classifier.load_state_dict(torch.load(args.load_file))
    if args.train:
示例#16
0
def test(args):
    if args.config_file != "":
        cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    logger = setup_logger('reid_baseline.eval', cfg.OUTPUT_DIR, 0, train=False)

    logger.info('Running with config:\n{}'.format(cfg))

    _, val_dl, num_query, num_classes = make_dataloader(cfg)

    model = build_model(cfg, num_classes)
    if cfg.TEST.MULTI_GPU:
        model = nn.DataParallel(model)
        model = convert_model(model)
        logger.info('Use multi gpu to inference')
    para_dict = torch.load(cfg.TEST.WEIGHT)
    model.load_state_dict(para_dict)
    model.cuda()
    model.eval()

    feats, pids, camids, paths = [], [], [], []
    with torch.no_grad():
        for batch in tqdm(val_dl, total=len(val_dl), leave=False):
            data, pid, camid, path = batch
            paths.extend(list(path))
            data = data.cuda()
            feat = model(data).detach().cpu()
            feats.append(feat)
            pids.append(pid)
            camids.append(camid)
    feats = torch.cat(feats, dim=0)
    pids = torch.cat(pids, dim=0)
    camids = torch.cat(camids, dim=0)

    query_feat = feats[:num_query]
    query_pid = pids[:num_query]
    query_camid = camids[:num_query]
    query_path = np.array(paths[:num_query])

    gallery_feat = feats[num_query:]
    gallery_pid = pids[num_query:]
    gallery_camid = camids[num_query:]
    gallery_path = np.array(paths[num_query:])

    distmat = euclidean_dist(query_feat, gallery_feat)

    cmc, mAP, all_AP = eval_func(distmat.numpy(),
                                 query_pid.numpy(),
                                 gallery_pid.numpy(),
                                 query_camid.numpy(),
                                 gallery_camid.numpy(),
                                 use_cython=True)

    if cfg.TEST.VIS:
        worst_q = np.argsort(all_AP)[:cfg.TEST.VIS_Q_NUM]
        qid = query_pid[worst_q]
        q_im = query_path[worst_q]

        ind = np.argsort(distmat, axis=1)
        gid = gallery_pid[ind[worst_q]][..., :cfg.TEST.VIS_G_NUM]
        g_im = gallery_path[ind[worst_q]][..., :cfg.TEST.VIS_G_NUM]

        for idx in range(cfg.TEST.VIS_Q_NUM):
            sid = qid[idx] == gid[idx]
            im = rank_list_to_im(range(len(g_im[idx])), sid, q_im[idx],
                                 g_im[idx])

            im.save(
                osp.join(cfg.OUTPUT_DIR,
                         'worst_query_{}.jpg'.format(str(idx).zfill(2))))

    logger.info('Validation Result:')
    for r in cfg.TEST.CMC:
        logger.info('CMC Rank-{}: {:.2%}'.format(r, cmc[r - 1]))
    logger.info('mAP: {:.2%}'.format(mAP))
    logger.info('-' * 20)

    if not cfg.TEST.RERANK:
        return

    distmat = re_rank(query_feat, gallery_feat)
    cmc, mAP, all_AP = eval_func(distmat,
                                 query_pid.numpy(),
                                 gallery_pid.numpy(),
                                 query_camid.numpy(),
                                 gallery_camid.numpy(),
                                 use_cython=True)

    logger.info('ReRanking Result:')
    for r in cfg.TEST.CMC:
        logger.info('CMC Rank-{}: {:.2%}'.format(r, cmc[r - 1]))
    logger.info('mAP: {:.2%}'.format(mAP))
    logger.info('-' * 20)
示例#17
0
def get_train_dataloader():
    update_config(cfg)
    train_loader, sampler = make_dataloader(cfg,
                                            is_train=True,
                                            distributed=True)
    return train_loader, sampler
示例#18
0
def main():
    args = get_args()
    # create teacher
    model_path = './pose_higher_hrnet_w32_512_2.pth'
    pre_train_model = PoseHigherResolutionNet(cfg)
    dev = 'cuda' if torch.cuda.is_available() else 'cpu'
    # load pretrain
    pre_train_model.load_state_dict(torch.load(model_path, torch.device(dev)))

    # freeze teacher
    for param in pre_train_model.parameters():
        param.requires_grad = False

    # student = PoseHigherResolutionNet(new_cfg)
    student_cfg = get_student_cfg(cfg, args.student_file)
    student_cfg.LOG_DIR = args.log
    student = PoseHigherResolutionNet(student_cfg)
    student = torch.nn.DataParallel(student)

    # Set up logger
    logger, final_output_dir, tb_log_dir = create_logger(
        student_cfg, 'simple_model', 'train')

    final_output_dir = student_cfg.LOG_DIR

    if torch.cuda.is_available():
        # cudnn related setting
        cudnn.benchmark = student_cfg.CUDNN.BENCHMARK
        torch.backends.cudnn.deterministic = student_cfg.CUDNN.DETERMINISTIC
        torch.backends.cudnn.enabled = student_cfg.CUDNN.ENABLED

    train_loader = make_dataloader(student_cfg, True, False)
    # iteration = 1

    loss_factory = MultiLossFactory(student_cfg).cuda()

    logger.info(train_loader.dataset)

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    best_perf = -1
    best_model = False
    last_epoch = -1

    optimizer = optim.Adam(student.parameters(), lr=student_cfg.TRAIN.LR)
    begin_epoch = student_cfg.TRAIN.BEGIN_EPOCH

    end_epoch = student_cfg.TRAIN.END_EPOCH

    checkpoint_file = os.path.join(final_output_dir, 'checkpoint.pth.tar')

    if student_cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
        checkpoint = torch.load(checkpoint_file)
        begin_epoch = checkpoint['epoch']
        best_perf = checkpoint['perf']
        last_epoch = checkpoint['epoch']
        student.load_state_dict(checkpoint['state_dict'])

        optimizer.load_state_dict(checkpoint['optimizer'])
        logger.info("=> loaded checkpoint '{}' (epoch {})".format(
            checkpoint_file, checkpoint['epoch']))

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer,
        student_cfg.TRAIN.LR_STEP,
        student_cfg.TRAIN.LR_FACTOR,
        last_epoch=last_epoch)

    pre_train_model.to(dev)
    student.to(dev)
    for epoch in range(begin_epoch, end_epoch):
        start = time.time()
        do_train(student_cfg, student, train_loader, loss_factory, optimizer,
                 epoch, final_output_dir, writer_dict, pre_train_model, dev)
        print('epoch', epoch, ':', round((time.time() - start) / 60, 2),
              'minutes')
        # In PyTorch 1.1.0 and later, you should call `lr_scheduler.step()` after `optimizer.step()`.
        lr_scheduler.step()

        perf_indicator = epoch
        if perf_indicator >= best_perf:
            best_perf = perf_indicator
            best_model = True
        else:
            best_model = False

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': student_cfg.MODEL.NAME,
                'state_dict': student.state_dict(),
                'best_state_dict': student.module.state_dict(),
                'perf': perf_indicator,
                'optimizer': optimizer.state_dict(),
            }, best_model, final_output_dir)
        final_model_state_file = os.path.join(
            final_output_dir,
            'final_state{}.pth.tar'.format(torch.cuda.get_device_name()))
        logger.info(
            'saving final model state to {}'.format(final_model_state_file))
        torch.save(student.module.state_dict(), final_model_state_file)
        writer_dict['writer'].close()
示例#19
0
from loss import make_loss
from processor import do_train
from solver import make_optimizer, WarmupMultiStepLR
from utils.logger import setup_logger

if __name__ == '__main__':
    Cfg = Configuration()
    log_dir = Cfg.DATALOADER.LOG_DIR
    logger = setup_logger('{}'.format(Cfg.PROJECT_NAME), log_dir)
    logger.info("Running with config:\n{}".format(Cfg.PROJECT_NAME))

    os.environ['CUDA_VISIBLE_DEVICES'] = Cfg.DEVICE_ID
    cudnn.benchmark = True
    # This flag allows you to enable the inbuilt cudnn auto-tuner to find the best algorithm to use for your hardware.

    train_loader, val_loader = make_dataloader(Cfg)
    model = make_model(Cfg)

    optimizer = make_optimizer(Cfg, model)
    scheduler = WarmupMultiStepLR(Cfg, optimizer)
    loss_func = make_loss(Cfg)
    do_train(
        Cfg,
        model,
        train_loader,
        val_loader,
        optimizer,
        scheduler,  # modify for using self trained model
        loss_func,
    )
示例#20
0
def start(data_file_name,
          num_noise_words,
          vec_dim,
          num_epochs,
          batch_size,
          lr,
          model_ver='dm',
          context_size=0,
          vec_combine_method='sum',
          save_all=True,
          generate_plot=True,
          max_generated_batches=5,
          num_workers=1):
    """Trains a new model. The latest checkpoint and the best performing
    model are saved in the *models* directory.

    Parameters
    ----------
    data_file_name: str
        Name of a file in the *data* directory.

    model_ver: str, one of ('dm', 'dbow'), default='dbow'
        Version of the model as proposed by Q. V. Le et al., Distributed
        Representations of Sentences and Documents. 'dbow' stands for
        Distributed Bag Of Words, 'dm' stands for Distributed Memory.

    vec_combine_method: str, one of ('sum', 'concat'), default='sum'
        Method for combining paragraph and word vectors when model_ver='dm'.
        Currently only the 'sum' operation is implemented.

    context_size: int, default=0
        Half the size of a neighbourhood of target words when model_ver='dm'
        (i.e. how many words left and right are regarded as context). When
        model_ver='dm' context_size has to greater than 0, when
        model_ver='dbow' context_size has to be 0.

    num_noise_words: int
        Number of noise words to sample from the noise distribution.

    vec_dim: int
        Dimensionality of vectors to be learned (for paragraphs and words).

    num_epochs: int
        Number of iterations to train the model (i.e. number
        of times every example is seen during training).

    batch_size: int
        Number of examples per single gradient update.

    lr: float
        Learning rate of the Adam optimizer.

    save_all: bool, default=False
        Indicates whether a checkpoint is saved after each epoch.
        If false, only the best performing model is saved.

    generate_plot: bool, default=True
        Indicates whether a diagnostic plot displaying loss value over
        epochs is generated after each epoch.

    max_generated_batches: int, default=5
        Maximum number of pre-generated batches.

    num_workers: int, default=1
        Number of batch generator jobs to run in parallel. If value is set
        to -1 number of machine cores are used.
    """
    if model_ver not in ('dm', 'dmspline', 'dbow'):
        raise ValueError("Invalid version of the model")

    model_ver_is_dbow = model_ver == 'dbow'
    model_ver_is_dm = model_ver == 'dm'
    model_ver_is_dmspline = model_ver == 'dmspline'

    if model_ver_is_dbow and context_size != 0:
        raise ValueError("Context size has to be zero when using dbow")
    if not model_ver_is_dbow:
        if vec_combine_method not in ('sum', 'concat'):
            raise ValueError("Invalid method for combining paragraph and word "
                             "vectors when using dm")
        if context_size <= 0:
            raise ValueError("Context size must be positive when using dm")

    # dataset = load_dataset(data_file_name, model_ver)
    # nce_data = NCEData(
    #     dataset,
    #     batch_size,
    #     context_size,
    #     num_noise_words,
    #     max_generated_batches,
    #     num_workers,
    #     model_ver)
    # nce_data.start()

    print ('Loading data and making data loader ...')
    doc_ids, context_ids, target_noise_ids, word_to_ind_dict = load_and_cache_data(data_file_root=data_file_name, num_context_words=context_size, num_noise_words=num_noise_words)
    dataloader = make_dataloader((doc_ids, context_ids, target_noise_ids), batch_size)

    all_doc_ids = set()
    for i in doc_ids.tolist():
        all_doc_ids.add(i)

    print ('num unique doc ids: ', len(all_doc_ids))

    try:
        _run(dataloader, data_file_name, all_doc_ids,
             word_to_ind_dict, context_size, num_noise_words, vec_dim,
             num_epochs, batch_size, lr, model_ver, vec_combine_method,
             save_all, generate_plot, model_ver_is_dbow, model_ver_is_dm)
    except KeyboardInterrupt:
        nce_data.stop()
示例#21
0
def main():
    parser = argparse.ArgumentParser(description="ReID Baseline Training")
    parser.add_argument("--config_file", default="", help="path to config file", type=str)
    parser.add_argument("opts", help="Modify config options using the command-line", default=None,nargs=argparse.REMAINDER)
    args = parser.parse_args()
    if args.config_file != "":
        cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir and not os.path.exists(output_dir):
        os.makedirs(output_dir)
    num_gpus = torch.cuda.device_count()
    logger = setup_logger('reid_baseline', output_dir, 0)
    logger.info('Using {} GPUS'.format(num_gpus))
    logger.info('Running with config:\n{}'.format(cfg))
    if cfg.INPUT.SEPNORM.USE:
        train_dl, val_dl, num_query, num_classes = make_sepnorm_dataloader(cfg, num_gpus)
    elif cfg.DATASETS.EXEMPLAR.USE:
        train_dl, val_dl, num_query, num_classes,exemplar_dl = make_dataloader(cfg, num_gpus)
    else:
        train_dl, val_dl, num_query, num_classes = make_dataloader(cfg, num_gpus)

    model = build_model(cfg, num_classes)
    loss = make_loss(cfg, num_classes)
    if cfg.SOLVER.CENTER_LOSS.USE == True:
        trainer = CenterTrainer(cfg, model, train_dl, val_dl,
                      loss, num_query, num_gpus)
    else:
        if cfg.SOLVER.MIXUP.USE:
            trainer = NegMixupTrainer(cfg, model, train_dl, val_dl,
                              loss, num_query, num_gpus)
        elif cfg.DATASETS.EXEMPLAR.USE:
            if cfg.DATASETS.EXEMPLAR.MEMORY.USE:
                trainer = ExemplarMemoryTrainer(cfg, model, train_dl, val_dl,exemplar_dl,
                                  loss, num_query, num_gpus)
            else:
                trainer = UIRLTrainer(cfg, model, train_dl, val_dl,exemplar_dl,
                                  loss, num_query, num_gpus)
        elif cfg.DATASETS.HIST_LABEL.USE:
            trainer = HistLabelTrainer(cfg, model, train_dl, val_dl,
                    loss, num_query, num_gpus)
        else:
            trainer = BaseTrainer(cfg, model, train_dl, val_dl,
                              loss, num_query, num_gpus)
    if cfg.INPUT.SEPNORM.USE:
        logger.info('train transform0: \n{}'.format(train_dl.dataset.transform0))
        logger.info('train transform1: \n{}'.format(train_dl.dataset.transform1))

        logger.info('valid transform0: \n{}'.format(val_dl.dataset.transform0))
        logger.info('valid transform1: \n{}'.format(val_dl.dataset.transform1))

    else:
        logger.info('train transform: \n{}'.format(train_dl.dataset.transform))
        logger.info('valid transform: \n{}'.format(val_dl.dataset.transform))
    logger.info(type(model))
    logger.info(loss)
    logger.info(trainer)
    for epoch in range(trainer.epochs):
        for batch in trainer.train_dl:
            trainer.step(batch)
            trainer.handle_new_batch()
        trainer.handle_new_epoch()
示例#22
0
def main_worker(
        gpu, ngpus_per_node, args, final_output_dir, tb_log_dir
):
    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if cfg.MULTIPROCESSING_DISTRIBUTED:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            # 通过节点序号来计算进程在所有进程之中的序号
            args.rank = args.rank * ngpus_per_node + gpu
        print('Init process group: dist_url: {}, world_size: {}, rank: {}'.
              format(args.dist_url, args.world_size, args.rank))
        dist.init_process_group(
            backend=cfg.DIST_BACKEND,
            init_method=args.dist_url,
            world_size=args.world_size,
            rank=args.rank
        )

    update_config(cfg, args)

    # setup logger
    logger, _ = setup_logger(final_output_dir, args.rank, 'train')

    model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
        cfg, is_train=True
    )

    # copy model file
    if not cfg.MULTIPROCESSING_DISTRIBUTED or (
            cfg.MULTIPROCESSING_DISTRIBUTED
            and args.rank % ngpus_per_node == 0
    ):
        this_dir = os.path.dirname(__file__)
        shutil.copy2(
            os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
            final_output_dir
        )

    # 利用tensorboard可视化结果
    writer_dict = {
        'writer': SummaryWriter(logdir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }


    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            # args.workers = int(args.workers / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu]
            )
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        model = torch.nn.DataParallel(model).cuda()

    if not cfg.MULTIPROCESSING_DISTRIBUTED or (
            cfg.MULTIPROCESSING_DISTRIBUTED
            and args.rank % ngpus_per_node == 0
    ):
        dump_input = torch.rand(
            (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE)
        ).cuda()
        #writer_dict['writer'].add_graph(model, (dump_input, ))
        logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    # define loss function (criterion) and optimizer
    loss_factory = MultiLossFactory(cfg).cuda()

    # Data loading code
        train_loader = make_dataloader(
            cfg, is_train=True, distributed=args.distributed
        )
示例#23
0
def main_worker(
        gpu, ngpus_per_node, args, final_output_dir, tb_log_dir
):
    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    if cfg.FP16.ENABLED:
        assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled."

    if cfg.FP16.STATIC_LOSS_SCALE != 1.0:
        if not cfg.FP16.ENABLED:
            print("Warning:  if --fp16 is not used, static_loss_scale will be ignored.")

    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if cfg.MULTIPROCESSING_DISTRIBUTED:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        print('Init process group: dist_url: {}, world_size: {}, rank: {}'.
              format(args.dist_url, args.world_size, args.rank))
        dist.init_process_group(
            backend=cfg.DIST_BACKEND,
            init_method=args.dist_url,
            world_size=args.world_size,
            rank=args.rank
        )

    update_config(cfg, args)

    # setup logger
    logger, _ = setup_logger(final_output_dir, args.rank, 'train')

    model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
        cfg, is_train=True
    )

    # copy model file
    if not cfg.MULTIPROCESSING_DISTRIBUTED or (
            cfg.MULTIPROCESSING_DISTRIBUTED
            and args.rank % ngpus_per_node == 0
    ):
        this_dir = os.path.dirname(__file__)
        shutil.copy2(
            os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
            final_output_dir
        )

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    if not cfg.MULTIPROCESSING_DISTRIBUTED or (
            cfg.MULTIPROCESSING_DISTRIBUTED
            and args.rank % ngpus_per_node == 0
    ):
        dump_input = torch.rand(
            (1, 3, cfg.DATASET.INPUT_SIZE, cfg.DATASET.INPUT_SIZE)
        )
        writer_dict['writer'].add_graph(model, (dump_input, ))
        # logger.info(get_model_summary(model, dump_input, verbose=cfg.VERBOSE))

    if cfg.FP16.ENABLED:
        model = network_to_half(model)

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            # args.workers = int(args.workers / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu]
            )
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    loss_factory = MultiLossFactory(cfg).cuda()

    # Data loading code
    train_loader = make_dataloader(
        cfg, is_train=True, distributed=args.distributed
    )
    logger.info(train_loader.dataset)

    best_perf = -1
    best_model = False
    last_epoch = -1
    optimizer = get_optimizer(cfg, model)

    if cfg.FP16.ENABLED:
        optimizer = FP16_Optimizer(
            optimizer,
            static_loss_scale=cfg.FP16.STATIC_LOSS_SCALE,
            dynamic_loss_scale=cfg.FP16.DYNAMIC_LOSS_SCALE
        )

    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
    checkpoint_file = os.path.join(
        final_output_dir, 'checkpoint.pth.tar')
    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
        logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
        checkpoint = torch.load(checkpoint_file)
        begin_epoch = checkpoint['epoch']
        best_perf = checkpoint['perf']
        last_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])

        optimizer.load_state_dict(checkpoint['optimizer'])
        logger.info("=> loaded checkpoint '{}' (epoch {})".format(
            checkpoint_file, checkpoint['epoch']))

    if cfg.FP16.ENABLED:
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer.optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
            last_epoch=last_epoch
        )
    else:
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
            last_epoch=last_epoch
        )

    for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
        lr_scheduler.step()

        # train one epoch
        do_train(cfg, model, train_loader, loss_factory, optimizer, epoch,
                 final_output_dir, tb_log_dir, writer_dict, fp16=cfg.FP16.ENABLED)

        perf_indicator = epoch
        if perf_indicator >= best_perf:
            best_perf = perf_indicator
            best_model = True
        else:
            best_model = False

        if not cfg.MULTIPROCESSING_DISTRIBUTED or (
                cfg.MULTIPROCESSING_DISTRIBUTED
                and args.rank == 0
        ):
            logger.info('=> saving checkpoint to {}'.format(final_output_dir))
            save_checkpoint({
                'epoch': epoch + 1,
                'model': cfg.MODEL.NAME,
                'state_dict': model.state_dict(),
                'best_state_dict': model.module.state_dict(),
                'perf': perf_indicator,
                'optimizer': optimizer.state_dict(),
            }, best_model, final_output_dir)

    final_model_state_file = os.path.join(
        final_output_dir, 'final_state{}.pth.tar'.format(gpu)
    )

    logger.info('saving final model state to {}'.format(
        final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
示例#24
0
    if args.config_file != "":
        cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    # print(cfg)
    dict_args = {}
    dict_args.update(vars(args))
    print(pprint.pformat(dict_args))

    output_dir = cfg.OUTPUT_DIR
    if output_dir and not os.path.exists(output_dir):
        os.makedirs(output_dir)

    num_gpus = torch.cuda.device_count()
    train_dl, val_dl, num_query, num_classes = make_dataloader(cfg, num_gpus)

    print("==> build model..")
    model = build_model(cfg, num_classes)
    print(model)
    print("==> load params..")
    param_dict = torch.load(cfg.TEST.WEIGHT)
    model = torch.nn.DataParallel(model)
    if cfg.SOLVER.SYNCBN:
        print("convert_model to syncbn")
        model = convert_model(model)
    #
    param_dict = {k.replace('module.', ''): v for k, v in param_dict.items()}
    print('unloaded_param:')
    print([
        k for k, v in model.state_dict().items()