示例#1
0
    def __init__(self, net_path=None, **kargs):
        super(TrackerSiamFC, self).__init__(name='SiamFC',
                                            is_deterministic=True)
        self.cfg = parse_args(**kargs)
        self.para = param()
        # setup GPU device if available
        self.cuda = torch.cuda.is_available()
        self.device = torch.device('cuda:0' if self.cuda else 'cpu')

        # setup model
        self.net = SiamFC()
        # if net_path is not None:
        #     self.net.load_state_dict(torch.load(
        #     net_path, map_location=lambda storage, loc: storage))
        self.net = self.net.to(self.device)

        # setup optimizer
        self.optimizer = optim.SGD(
            self.net.parameters(),
            lr=self.cfg.initial_lr,  # self.cfg.initial_lr = 0.01
            weight_decay=self.cfg.
            weight_decay,  # self.cfg.weight_decay = 0.0005
            momentum=self.cfg.momentum)  # self.cfg.momentum = 0.9

        # setup lr scheduler
        # self.cfg.lr_decay = 0.8685113737513527

        self.lr_scheduler = ExponentialLR(self.optimizer,
                                          gamma=self.cfg.lr_decay)
示例#2
0
 def __init__(self, **kargs):
     super(SiamFC, self).__init__()
     # 这里讲一下conv2中的group参数
     # 假设当前的输入channel数为8, 输出为12,卷积核大小为3,
     # 则当group=1时, 总共有12个神经元,每个神经元的参数大小为3*3*8,每个神经元生成一个channel
     # 假设当group=4的时候,每一组的神经元输入channel=2,输出=3,总共仍然有12个神经元,只不过每个神经元的大小为3*3*2,而不是3*3*8
     # Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), groups=2)
     # self.feature[4].weight.data.shape = torch.Size([256, 48, 5, 5])
     # setup GPU device if available
     self.cuda = torch.cuda.is_available()
     self.cfg = parse_args(**kargs)
     self.device = torch.device('cuda:0' if self.cuda else 'cpu')
     self.para = param()
     self.feature = nn.Sequential(
         # conv1
         nn.Conv2d(3, 96, 11, 2),  # 这里的权值是[96, 3, 11, 11]
         nn.BatchNorm2d(96, eps=1e-6, momentum=0.05),
         nn.ReLU(inplace=True),
         nn.MaxPool2d(3, 2),
         # conv2
         nn.Conv2d(96, 256, 5, 1, groups=2),
         nn.BatchNorm2d(256, eps=1e-6, momentum=0.05),
         nn.ReLU(inplace=True),
         nn.MaxPool2d(3, 2),
         # conv3
         nn.Conv2d(256, 384, 3, 1),
         nn.BatchNorm2d(384, eps=1e-6, momentum=0.05),
         nn.ReLU(inplace=True),
         # conv4
         nn.Conv2d(384, 384, 3, 1, groups=2),
         nn.BatchNorm2d(384, eps=1e-6, momentum=0.05),
         nn.ReLU(inplace=True),
         # conv5
         nn.Conv2d(384, 256, 3, 1, groups=2))
     self._initialize_weights()
     self.deconv = nn.Conv2d(256 * self.para.prior_frames_num, 256, 1,
                             1)  # 用于将堆叠后的特征对齐
示例#3
0
 def __init__(self, seq_dataset, **kargs):
     super(Pairwise, self).__init__()
     self.cfg = self.parse_args(**kargs)
     self.para = param()
     self.seq_dataset = seq_dataset  # 这是一个got10k的dataset类
     self.indices = np.random.permutation(
         len(seq_dataset))  # 随机打乱seq_dataset的数据,打乱后的索引放到indices
     # augmentation for exemplar and instance images
     self.transform_z = Compose([
         RandomStretch(max_stretch=0.05),  # 这里是对图像进行随机尺度[0.95 1.05)的缩放
         CenterCrop(self.cfg.instance_sz -
                    8),  # cfg.instance_sz = 255 从图像中间裁剪,尺寸为255-8大小
         RandomCrop(self.cfg.instance_sz -
                    2 * 8),  # cfg.instance_sz = 255 对图像进行随机裁剪,尺寸为255 - 2*8
         CenterCrop(self.cfg.exemplar_sz
                    ),  # cfg.exemplar_sz = 127  然后在中心裁剪成127 * 127
         ToTensor()
     ])
     self.transform_x = Compose([
         RandomStretch(max_stretch=0.05),
         CenterCrop(self.cfg.instance_sz - 8),
         RandomCrop(self.cfg.instance_sz - 2 * 8),
         ToTensor()
     ])
示例#4
0
# 将file_path中的预训练模型读入net
def load_pre_train_mode(net, file_path):
    # 读取预训练的网络
    pretrained_dict = torch.load(file_path)
    my_model_dict = net.state_dict()
    pretrained_dict = {
        k: v
        for k, v in pretrained_dict.items() if k in my_model_dict
    }
    my_model_dict.update(pretrained_dict)
    net.load_state_dict(my_model_dict)


if __name__ == '__main__':
    # setup dataset
    para = param()
    name = 'GOT-10k'
    assert name in ['VID', 'GOT-10k']
    if name == 'GOT-10k':
        root_dir = '/home/fanfu/data/GOT-10k'
        seq_dataset = GOT10k(root_dir, subset='train')
    elif name == 'VID':
        root_dir = '/home/fanfu/data/ILSVRC2015/Data/VID/'
        seq_dataset = ImageNetVID(root_dir, subset=('train', 'val'))
    pair_dataset = Pairwise(seq_dataset)

    # setup data loader
    cuda = torch.cuda.is_available()
    loader = DataLoader(pair_dataset,
                        batch_size=8,
                        shuffle=True,