예제 #1
0
opt = main()

####################### Output path, logger, device and random seed configuration #################

exp_path = opt.read_model_path if opt.testing else hyperparam_pseudo_method(
    opt)
if not os.path.exists(exp_path):
    os.makedirs(exp_path)

logger = set_logger(exp_path, testing=opt.testing)
logger.info("Parameters: " + str(json.dumps(vars(opt), indent=4)))
logger.info("Experiment path: %s" % (exp_path))
sp_device, qg_device = set_torch_device(opt.deviceId[0]), set_torch_device(
    opt.deviceId[1])
set_random_seed(opt.seed, device='cuda')

################################ Vocab and Data Reader ###########################

sp_copy, qg_copy = 'copy__' in opt.read_sp_model_path, 'copy__' in opt.read_qg_model_path
sp_vocab, qg_vocab = Vocab(opt.dataset, task='semantic_parsing',
                           copy=sp_copy), Vocab(opt.dataset,
                                                task='question_generation',
                                                copy=qg_copy)
logger.info("Semantic Parsing model vocabulary ...")
logger.info("Vocab size for input natural language sentence is: %s" %
            (len(sp_vocab.word2id)))
logger.info("Vocab size for output logical form is: %s" %
            (len(sp_vocab.lf2id)))

logger.info("Question Generation model vocabulary ...")
예제 #2
0
def main():

    # 为了看看repo提供的model.pth.tar-9在validation集的mAp和rank-1
    # 我自己训练的tar-9只有mAP: 15.1%; Rank-1: 23.3% ,不知道为什么
    # 更改args.load_weights = '/model/caohw9/track3_model/model.pth.tar-9'

    global args
    print(args)

    set_random_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu:
        use_gpu = False
    sys.stdout = Logger(osp.join(args.save_dir, "log.txt"))
    if use_gpu:
        print('Currently using GPU {}'.format(args.gpu_devices))
        cudnn.benchmark = True
    else:
        warnings.warn(
            'Currently using CPU, however, GPU is highly recommended')

    # 初始化loader
    print('Initializing image data manager')
    dm = ImageDataManager(use_gpu, **trainset_kwargs(args))
    trainloader, testloader_dict = dm.return_dataloaders(
    )  #trainloader用于训练,testloader_dict包含['query']和['gallery']2个loader
    print('suffessfully initialize loaders!')

    # 初始化模型
    print('Initializing model: {}'.format(
        args.arch))  #args.arch default='resnet101'
    model = models.init_model(name=args.arch,
                              num_classes=dm.num_train_pids,
                              loss={'xent', 'htri'},
                              pretrained=not args.no_pretrained,
                              use_gpu=use_gpu)
    print('Model size: {:.3f} M'.format(count_num_param(model)))

    # 加载预训练参数
    if args.load_weights and check_isfile(args.load_weights):
        load_pretrained_weights(model, args.load_weights)
        #加载训练过的模型后,先看看validation
        print('=> Validation')
        print('Evaluating {} ...'.format(
            args.test_set))  #args.test_set应该是指的validation set?
        queryloader = testloader_dict['query']
        galleryloader = testloader_dict['test']
        model = nn.DataParallel(model).cuda() if use_gpu else model
        rank1 = test(model, queryloader, galleryloader, use_gpu)  #validation!

    # 多GPU训练
    else:
        model = nn.DataParallel(model).cuda() if use_gpu else model

    # 定义loss,optimizer, lr_scheduler
    criterion_xent = CrossEntropyLoss(num_classes=dm.num_train_pids,
                                      use_gpu=use_gpu,
                                      label_smooth=args.label_smooth)
    criterion_htri = TripletLoss(margin=args.margin)
    optimizer = init_optimizer(model, **optimizer_kwargs(args))
    scheduler = init_lr_scheduler(optimizer, **lr_scheduler_kwargs(args))

    # 是否是resume训练
    if args.resume and check_isfile(args.resume):
        args.start_epoch = resume_from_checkpoint(
            args.resume, model, optimizer=optimizer)  #获取中断时刻的epoch数

    # 开始训练!
    time_start = time.time()
    print('=> Start training')

    for epoch in range(args.start_epoch, args.max_epoch):
        train(epoch, model, criterion_xent, criterion_htri, optimizer,
              trainloader, use_gpu)  #训练

        scheduler.step()  #更新lr

        # 当epoch数超过args.start_eval,每隔一定频率args.eval_freq,或者达到最后一个epoch,进行validation+存储checkpoint
        if (epoch + 1) > args.start_eval and args.eval_freq > 0 and (
                epoch + 1) % args.eval_freq == 0 or (epoch +
                                                     1) == args.max_epoch:
            print('=> Validation')
            print('Evaluating {} ...'.format(
                args.test_set))  #args.test_set应该是指的validation set?
            queryloader = testloader_dict['query']
            galleryloader = testloader_dict['test']
            rank1 = test(model, queryloader, galleryloader,
                         use_gpu)  #validation!

            save_checkpoint(
                {
                    'state_dict': model.state_dict(),  #模型的状态字典
                    'rank1': rank1,
                    'epoch': epoch + 1,
                    'arch': args.arch,  #default='resnet101'
                    'optimizer': optimizer.state_dict(
                    ),  #优化器对象的状态字典,包含优化器的状态和超参数(如lr, momentum,weight_decay等)
                },
                args.save_dir)  #validation同时保存checkpoint

    # 训练结束!
    elapsed = round(time.time() - time_start)  #持续时间
    elapsed = str(datetime.timedelta(seconds=elapsed))
    print('Elapsed {}'.format(elapsed))
예제 #3
0
        assert opt.read_model_path
    return opt

opt = main()

####################### Output path, logger, device and random seed configuration #################

exp_path = opt.read_model_path if opt.testing else hyperparam_lm(opt)
if not os.path.exists(exp_path):
    os.makedirs(exp_path)

logger = set_logger(exp_path, testing=opt.testing)
logger.info("Parameters: " + str(json.dumps(vars(opt), indent=4)))
logger.info("Experiment path: %s" % (exp_path))
opt.device = set_torch_device(opt.deviceId)
set_random_seed(opt.seed, device=opt.device.type)

################################ Vocab and Data Reader ###########################

lm_vocab = Vocab(opt.dataset, task='language_model')
if opt.side == 'question':
    word2id = lm_vocab.word2id
    logger.info("Vocab size for natural language sentence is: %s" % (len(word2id)))
else:
    word2id = lm_vocab.lf2id
    logger.info("Vocab size for logical form is: %s" % (len(word2id)))

logger.info("Read dataset %s starts at %s" % (opt.dataset, time.asctime(time.localtime(time.time()))))
Example.set_domain(opt.dataset)
if not opt.testing:
    train_dataset, dev_dataset = Example.load_dataset(choice='train')
예제 #4
0
parser.add_argument('--deviceId',
                    type=int,
                    default=-1,
                    help='train model on ith gpu. -1:cpu, 0:auto_select')
parser.add_argument('--seed', type=int, default=999)
args = parser.parse_args()
assert (not args.testing) or args.read_model_path
if args.testing:
    exp_path = args.read_model_path
else:
    exp_path = set_hyperparam_path(args)
if not os.path.exists(exp_path):
    os.makedirs(exp_path)
logger = set_logger(exp_path, testing=args.testing)
device, args.deviceId = set_torch_device(args.deviceId)
set_random_seed(args.seed, device=device)

logger.info("Parameters:" + str(json.dumps(vars(args), indent=4)))
logger.info("Experiment path: %s" % (exp_path))
logger.info("Read dataset starts at %s" %
            (time.asctime(time.localtime(time.time()))))

start_time = time.time()
Example.set_tokenizer(args.bert)  # set bert tokenizer
if not args.testing:
    train_loader = DataLoader(QNLIDataset('train'),
                              batch_size=args.batch_size,
                              shuffle=True,
                              collate_fn=collate_fn_labeled)
    dev_loader = DataLoader(QNLIDataset('dev'),
                            batch_size=args.batch_size,
예제 #5
0
def main():
    global args

    set_random_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu:
        use_gpu = False
    sys.stdout = Logger(osp.join(args.save_dir, "log.txt"))
    if use_gpu:
        print('Currently using GPU {}'.format(args.gpu_devices))
        cudnn.benchmark = True
    else:
        warnings.warn(
            'Currently using CPU, however, GPU is highly recommended')

    print('Initializing image data manager')
    dm = ImageDataManager(use_gpu, **trainset_kwargs(args))
    trainloader, testloader_dict = dm.return_dataloaders()

    print('Initializing model: {}'.format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dm.num_train_pids,
                              loss={'xent', 'htri'},
                              pretrained=not args.no_pretrained,
                              use_gpu=use_gpu)
    print('Model size: {:.3f} M'.format(count_num_param(model)))

    if args.load_weights and check_isfile(args.load_weights):
        load_pretrained_weights(model, args.load_weights)

    model = nn.DataParallel(model).cuda() if use_gpu else model

    criterion_xent = CrossEntropyLoss(num_classes=dm.num_train_pids,
                                      use_gpu=use_gpu,
                                      label_smooth=args.label_smooth)
    criterion_htri = TripletLoss(margin=args.margin)
    optimizer = init_optimizer(model, **optimizer_kwargs(args))
    scheduler = init_lr_scheduler(optimizer, **lr_scheduler_kwargs(args))

    if args.resume and check_isfile(args.resume):
        args.start_epoch = resume_from_checkpoint(args.resume,
                                                  model,
                                                  optimizer=optimizer)

    time_start = time.time()
    print('=> Start training')

    for epoch in range(args.start_epoch, args.max_epoch):
        train(epoch, model, criterion_xent, criterion_htri, optimizer,
              trainloader, use_gpu)

        scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_freq > 0 and (
                epoch + 1) % args.eval_freq == 0 or (epoch +
                                                     1) == args.max_epoch:
            print('=> Validation')

            print('Evaluating {} ...'.format(args.test_set))
            queryloader = testloader_dict['query']
            galleryloader = testloader_dict['test']
            rank1 = test(model, queryloader, galleryloader, use_gpu)

            save_checkpoint(
                {
                    'state_dict': model.state_dict(),
                    'rank1': rank1,
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'optimizer': optimizer.state_dict(),
                }, args.save_dir)

    elapsed = round(time.time() - time_start)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    print('Elapsed {}'.format(elapsed))