Exemplo n.º 1
0
 def __init__(self,
              attr_lens,
              sample_width,
              sample_height,
              sample_duration,
              model_type="tp",
              **kwargs):
     super(AttrResNet503D, self).__init__()
     resnet50 = resnet3d.resnet50(sample_width=sample_width,
                                  sample_height=sample_height,
                                  sample_duration=sample_duration)
     self.base = nn.Sequential(*list(resnet50.children())[:-2])
     self.feature_dim = 512
     self.idrelated_classifier = MultiLabelLinearAttributeModule(
         self.feature_dim, attr_lens[0])
     self.idunrelated_classifier = MultiLabelLinearAttributeModule(
         self.feature_dim, attr_lens[1])
def main():
    torch.manual_seed(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_dataset(name=args.dataset)

    transform_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    transform_test = T.Compose([
        T.Resize((args.height, args.width)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    pin_memory = True if use_gpu else False

    trainloader = DataLoader(
        VideoDataset(dataset.train,
                     seq_len=args.seq_len,
                     sample='random',
                     transform=transform_train),
        sampler=RandomIdentitySampler(dataset.train,
                                      num_instances=args.num_instances),
        batch_size=args.train_batch,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=True,
    )

    queryloader = DataLoader(
        VideoDataset(dataset.query,
                     seq_len=args.seq_len,
                     sample='dense',
                     transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    galleryloader = DataLoader(
        VideoDataset(dataset.gallery,
                     seq_len=args.seq_len,
                     sample='dense',
                     transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    print("Initializing model: {}".format(args.arch))
    if args.arch == 'resnet503d':
        model = resnet3d.resnet50(num_classes=dataset.num_train_pids,
                                  sample_width=args.width,
                                  sample_height=args.height,
                                  sample_duration=args.seq_len)
        if not os.path.exists(args.pretrained_model):
            raise IOError("Can't find pretrained model: {}".format(
                args.pretrained_model))
        print("Loading checkpoint from '{}'".format(args.pretrained_model))
        checkpoint = torch.load(args.pretrained_model)
        state_dict = {}
        for key in checkpoint['state_dict']:
            if 'fc' in key: continue
            state_dict[key.partition("module.")
                       [2]] = checkpoint['state_dict'][key]
        model.load_state_dict(state_dict, strict=False)
    else:
        model = models.init_model(name=args.arch,
                                  num_classes=dataset.num_train_pids,
                                  loss={'xent', 'htri'})
    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model.parameters()) / 1000000.0))

    criterion_xent = CrossEntropyLabelSmooth(
        num_classes=dataset.num_train_pids, use_gpu=use_gpu)
    criterion_htri = TripletLoss(margin=args.margin)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    if args.stepsize > 0:
        scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=args.stepsize,
                                        gamma=args.gamma)
    start_epoch = args.start_epoch

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")
        test(model, queryloader, galleryloader, args.pool, use_gpu)
        return

    start_time = time.time()
    best_rank1 = -np.inf
    if args.arch == 'resnet503d':
        torch.backends.cudnn.benchmark = False
    for epoch in range(start_epoch, args.max_epoch):
        print("==> Epoch {}/{}".format(epoch + 1, args.max_epoch))

        train(model, criterion_xent, criterion_htri, optimizer, trainloader,
              use_gpu)

        if args.stepsize > 0: scheduler.step()

        if args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or (
                epoch + 1) == args.max_epoch:
            print("==> Test")
            rank1 = test(model, queryloader, galleryloader, args.pool, use_gpu)
            is_best = rank1 > best_rank1
            if is_best: best_rank1 = rank1

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()
            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'rank1': rank1,
                    'epoch': epoch,
                }, is_best,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    print("Finished. Total elapsed time (h:m:s): {}".format(elapsed))
def main():
    torch.manual_seed(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu:
        use_gpu = False

    sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_dataset(name=args.dataset)

    transform_test = T.Compose([
        T.Resize((args.height, args.width)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    pin_memory = True if use_gpu else False

    queryloader = DataLoader(
        VideoDataset(dataset.query,
                     seq_len=args.seq_len,
                     sample='dense',
                     transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    galleryloader = DataLoader(
        VideoDataset(dataset.gallery,
                     seq_len=args.seq_len,
                     sample='dense',
                     transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )
    if args.arch == 'resnet503d':
        cudnn.benchmark = False

    print("Initializing model: {}".format(args.arch))
    if args.arch == 'resnet503d':
        model = resnet3d.resnet50(num_classes=dataset.num_train_pids,
                                  sample_width=args.width,
                                  sample_height=args.height,
                                  sample_duration=args.seq_len)
        if not os.path.exists(args.best_model):
            raise IOError("Can't find best model: {}".format(args.best_model))
        print("Loading checkpoint from '{}'".format(args.best_model))
        checkpoint = torch.load(args.best_model)
        state_dict = {}
        for key in checkpoint['state_dict']:
            state_dict[key] = checkpoint['state_dict'][key]
        model.load_state_dict(state_dict, strict=False)
    else:
        model = models.init_model(name=args.arch,
                                  num_classes=dataset.num_train_pids,
                                  loss={'xent', 'htri'})
        if not os.path.exists(args.best_model):
            raise IOError("Can't find best model: {}".format(args.best_model))
        print("Loading checkpoint from '{}'".format(args.best_model))
        checkpoint = torch.load(args.best_model)
        state_dict = {}
        for key in checkpoint['state_dict']:
            state_dict[key] = checkpoint['state_dict'][key]
        model.load_state_dict(state_dict, strict=False)
    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model.parameters()) / 1000000.0))

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")
        test(model, queryloader, galleryloader, args.pool, use_gpu)
        # distmat =  test(model, queryloader, galleryloader, args.pool, use_gpu)  # rnn时不能这么做,否则out of memory
        # if args.vis_ranked_res:
        #     visualize_ranked_results(
        #         distmat, dataset,
        #         save_dir=osp.join(args.save_dir, 'ranked_results'),
        #         topk=20,
        #     )

        return
def testseq(dataset_name, use_gpu):

    dataset_root = './video2img/track1_sct_img_test_big/'
    dataset = Graph_data_manager.AICityTrack2(root=dataset_root)

    width = 224
    height = 224
    transform_train = T.Compose([
        T.Random2DTranslation(height, width),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    transform_test = T.Compose([
        T.Resize((height, width)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    pin_memory = True if use_gpu else False
    seq_len = 4
    num_instance = 4
    train_batch = 32
    test_batch = 1

    queryloader = DataLoader(
        VideoDataset(dataset.query,
                     seq_len=seq_len,
                     sample='dense',
                     transform=transform_test),
        batch_size=test_batch,
        shuffle=False,
        num_workers=4,
        pin_memory=pin_memory,
        drop_last=False,
    )

    arch = "resnet50ta"
    pretrained_model = "./log/track12_ta224_checkpoint_ep500.pth.tar"

    start_epoch = 0
    print("Initializing model: {}".format(arch))
    dataset.num_train_pids = 517
    if arch == 'resnet503d':
        model = resnet3d.resnet50(num_classes=dataset.num_train_pids,
                                  sample_width=width,
                                  sample_height=height,
                                  sample_duration=seq_len)
        if not os.path.exists(pretrained_model):
            raise IOError(
                "Can't find pretrained model: {}".format(pretrained_model))
        print("Loading checkpoint from '{}'".format(pretrained_model))
        checkpoint = torch.load(pretrained_model)
        state_dict = {}
        for key in checkpoint['state_dict']:
            if 'fc' in key: continue
            state_dict[key.partition("module.")
                       [2]] = checkpoint['state_dict'][key]
        model.load_state_dict(state_dict, strict=False)
    else:
        if not os.path.exists(pretrained_model):
            model = models.init_model(name=arch,
                                      num_classes=dataset.num_train_pids,
                                      loss={'xent', 'htri'})
        else:
            model = models.init_model(name=arch,
                                      num_classes=dataset.num_train_pids,
                                      loss={'xent', 'htri'})
            checkpoint = torch.load(pretrained_model)
            model.load_state_dict(checkpoint['state_dict'])
            start_epoch = checkpoint['epoch'] + 1
            print("Loaded checkpoint from '{}'".format(pretrained_model))
            print("- start_epoch: {}\n- rank1: {}".format(
                start_epoch, checkpoint['rank1']))

    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model.parameters()) / 1000000.0))

    criterion_xent = CrossEntropyLabelSmooth(
        num_classes=dataset.num_train_pids, use_gpu=use_gpu)
    criterion_htri = TripletLoss(margin=0.3)

    lr = 0.0003
    gamma = 0.1
    stepsize = 200
    weight_decay = 5e-04

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=weight_decay)
    if stepsize > 0:
        scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=stepsize,
                                        gamma=gamma)
    start_epoch = start_epoch

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    test(model, queryloader, 'avg', use_gpu, dataset, -1, meta_data_tab=None)
Exemplo n.º 5
0
def resnet50(config):
    return resnet3d.resnet50(sample_input_D=128, sample_input_H=128, sample_input_W=128,
                    num_seg_classes=config.num_classes, shortcut_type='B')
Exemplo n.º 6
0
def main():
    torch.manual_seed(args.seed)  # 为CPU设置种子用于生成随机数,以使得结果是确定的
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices  # 在代码中指定需要使用的GPU
    use_gpu = torch.cuda.is_available()  # 查看当前环境是否支持CUDA,支持返回true,不支持返回false
    if args.use_cpu:
        use_gpu = False

    if not args.evaluate:  # 如果不是评估,那就是训练,输出训练日志;否则输出测试日志。
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))  # 打印所有参数

    if use_gpu:  # 如果使用gpu,输出选定的gpu,
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True  # 在程序刚开始加这条语句可以提升一点训练速度,没什么额外开销
        torch.cuda.manual_seed_all(args.seed)  # 为GPU设置种子用于生成随机数,以使得结果是确定的
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_dataset(name=args.dataset)  # 初始化数据集,从data_manager.py文件中加载。

    # import transforms as T.
    # T.Compose=一起组合几个变换。
    transform_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),  # 以一个概率进行,首先将图像大小增加到(1 + 1/8),然后执行随机裁剪。
        T.RandomHorizontalFlip(),  # 以给定的概率(0.5)随机水平翻转给定的PIL图像。
        T.ToTensor(),  # 将``PIL Image``或``numpy.ndarray``转换为张量。
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # 用平均值和标准偏差归一化张量图像。
        # input[channel] = (input[channel] - mean[channel]) / std[channel]
    ])

    transform_test = T.Compose([
        T.Resize((args.height, args.width)),  # 将输入PIL图像的大小调整为给定大小。
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # 设置pin_memory=True,则意味着生成的Tensor数据最开始是属于内存中的锁页内存,这样将内存的Tensor转义到GPU的显存就会更快一些。
    pin_memory = True if use_gpu else False

    # DataLoader数据加载器。 组合数据集和采样器,并在数据集上提供单进程或多进程迭代器。
    trainloader = DataLoader(
        # VideoDataset:基于视频的person reid的数据集.(训练的数据集,视频序列长度,采样方法:随机,进行数据增强)
        VideoDataset(dataset.train, seq_len=args.seq_len, sample='random', transform=transform_train),
        # 随机抽样N个身份,然后对于每个身份,随机抽样K个实例,因此批量大小为N * K.
        sampler=RandomIdentitySampler(dataset.train, num_instances=args.num_instances),
        batch_size=args.train_batch,  # 训练的批次大小
        num_workers=args.workers,  # 多进程的数目
        pin_memory=pin_memory,
        drop_last=True,
    )  # 如果数据集大小不能被批量大小整除,则设置为“True”以删除最后一个不完整的批次。

    queryloader = DataLoader(
        VideoDataset(dataset.query, seq_len=args.seq_len, sample='dense', transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,  # 设置为“True”以使数据在每个时期重新洗牌(默认值:False)。
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,  # 如果“False”和数据集的大小不能被批量大小整除,那么最后一批将更小。
    )

    galleryloader = DataLoader(
        VideoDataset(dataset.gallery, seq_len=args.seq_len, sample='dense', transform=transform_test),
        batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
        pin_memory=pin_memory, drop_last=False,
    )

    print("Initializing model: {}".format(args.arch))  # 模型的初始化

    if args.arch == 'resnet503d':
        model = resnet3d.resnet50(num_classes=dataset.num_train_pids, sample_width=args.width,
                                  sample_height=args.height, sample_duration=args.seq_len)
        # 如果不存在预训练模型,则报错
        if not os.path.exists(args.pretrained_model):
            raise IOError("Can't find pretrained model: {}".format(args.pretrained_model))
        # 导入预训练的模型
        print("Loading checkpoint from '{}'".format(args.pretrained_model))
        checkpoint = torch.load(args.pretrained_model)
        state_dict = {}  # 状态字典,从checkpoint文件中加载参数
        for key in checkpoint['state_dict']:
            if 'fc' in key:
                continue
            state_dict[key.partition("module.")[2]] = checkpoint['state_dict'][key]
        model.load_state_dict(state_dict, strict=False)
    else:
        model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, loss={'xent', 'htri'})
    print("Model size: {:.5f}M".format(sum(p.numel() for p in model.parameters())/1000000.0))

    # 损失函数:xent:softmax交叉熵损失函数。htri:三元组损失函数。
    criterion_xent = CrossEntropyLabelSmooth(num_classes=dataset.num_train_pids, use_gpu=use_gpu)
    criterion_htri = TripletLoss(margin=args.margin)
    # 优化器:adam
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    # stepsize,逐步减少学习率(> 0表示已启用)
    if args.stepsize > 0:
        scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma)
        # lr_scheduler学习率计划,StepLR,将每个参数组的学习速率设置为每个步长时期由gamma衰减的初始lr.
    start_epoch = args.start_epoch  # 手动时期编号(重启时有用)

    if use_gpu:
        model = nn.DataParallel(model).cuda()  # 多GPU训练
        # DataParallel是torch.nn下的一个类,需要制定的参数是module(可以多gpu运行的类函数)和input(数据集)

    if args.evaluate:  # 这里的evaluate没有意义,应该添加代码导入保存的checkpoint,再test
        print("Evaluate only")  # 进行评估
        test(model, queryloader, galleryloader, args.pool, use_gpu)
        return

    start_time = time.time()  # 开始的时间
    best_rank1 = -np.inf  # 初始化,负无穷
    if args.arch == 'resnet503d':  # 如果模型为resnet503d,
        torch.backends.cudnn.benchmark = False

    for epoch in range(start_epoch, args.max_epoch):  # epoch,从开始到最大,进行训练。
        print("==> Epoch {}/{}".format(epoch+1, args.max_epoch))
        
        train(model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu)
        
        if args.stepsize > 0:
            scheduler.step()

        # 如果运行一次评估的需要的epoch数大于0,并且当前epoch+1能整除这个epoch数,或者等于最大epoch数。那么就进行一次评估。
        if args.eval_step > 0 and (epoch+1) % args.eval_step == 0 or (epoch+1) == args.max_epoch:
            print("==> Test")
            rank1 = test(model, queryloader, galleryloader, args.pool, use_gpu)
            is_best = rank1 > best_rank1  # 比较,大于则返回true,否则返回false。
            if is_best:
                best_rank1 = rank1

            if use_gpu:
                state_dict = model.module.state_dict()
                # 函数static_dict()用于返回包含模块所有状态的字典,包括参数和缓存。
            else:
                state_dict = model.state_dict()
            # 保存checkpoint文件
            save_checkpoint({
                'state_dict': state_dict,
                'rank1': rank1,
                'epoch': epoch,
            }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch+1) + '.pth.tar'))
    # 经过的时间
    elapsed = round(time.time() - start_time)  # round() 方法返回浮点数x的四舍五入值
    elapsed = str(datetime.timedelta(seconds=elapsed))  # 对象代表两个时间之间的时间差,
    print("Finished. Total elapsed time (h:m:s): {}".format(elapsed))