def __init__(self, net_path=None, **kargs): super(TrackerSiamFC, self).__init__(name='SiamFC', is_deterministic=True) self.cfg = parse_args(**kargs) self.para = param() # setup GPU device if available self.cuda = torch.cuda.is_available() self.device = torch.device('cuda:0' if self.cuda else 'cpu') # setup model self.net = SiamFC() # if net_path is not None: # self.net.load_state_dict(torch.load( # net_path, map_location=lambda storage, loc: storage)) self.net = self.net.to(self.device) # setup optimizer self.optimizer = optim.SGD( self.net.parameters(), lr=self.cfg.initial_lr, # self.cfg.initial_lr = 0.01 weight_decay=self.cfg. weight_decay, # self.cfg.weight_decay = 0.0005 momentum=self.cfg.momentum) # self.cfg.momentum = 0.9 # setup lr scheduler # self.cfg.lr_decay = 0.8685113737513527 self.lr_scheduler = ExponentialLR(self.optimizer, gamma=self.cfg.lr_decay)
def __init__(self, **kargs): super(SiamFC, self).__init__() # 这里讲一下conv2中的group参数 # 假设当前的输入channel数为8, 输出为12,卷积核大小为3, # 则当group=1时, 总共有12个神经元,每个神经元的参数大小为3*3*8,每个神经元生成一个channel # 假设当group=4的时候,每一组的神经元输入channel=2,输出=3,总共仍然有12个神经元,只不过每个神经元的大小为3*3*2,而不是3*3*8 # Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), groups=2) # self.feature[4].weight.data.shape = torch.Size([256, 48, 5, 5]) # setup GPU device if available self.cuda = torch.cuda.is_available() self.cfg = parse_args(**kargs) self.device = torch.device('cuda:0' if self.cuda else 'cpu') self.para = param() self.feature = nn.Sequential( # conv1 nn.Conv2d(3, 96, 11, 2), # 这里的权值是[96, 3, 11, 11] nn.BatchNorm2d(96, eps=1e-6, momentum=0.05), nn.ReLU(inplace=True), nn.MaxPool2d(3, 2), # conv2 nn.Conv2d(96, 256, 5, 1, groups=2), nn.BatchNorm2d(256, eps=1e-6, momentum=0.05), nn.ReLU(inplace=True), nn.MaxPool2d(3, 2), # conv3 nn.Conv2d(256, 384, 3, 1), nn.BatchNorm2d(384, eps=1e-6, momentum=0.05), nn.ReLU(inplace=True), # conv4 nn.Conv2d(384, 384, 3, 1, groups=2), nn.BatchNorm2d(384, eps=1e-6, momentum=0.05), nn.ReLU(inplace=True), # conv5 nn.Conv2d(384, 256, 3, 1, groups=2)) self._initialize_weights() self.deconv = nn.Conv2d(256 * self.para.prior_frames_num, 256, 1, 1) # 用于将堆叠后的特征对齐
def __init__(self, seq_dataset, **kargs): super(Pairwise, self).__init__() self.cfg = self.parse_args(**kargs) self.para = param() self.seq_dataset = seq_dataset # 这是一个got10k的dataset类 self.indices = np.random.permutation( len(seq_dataset)) # 随机打乱seq_dataset的数据,打乱后的索引放到indices # augmentation for exemplar and instance images self.transform_z = Compose([ RandomStretch(max_stretch=0.05), # 这里是对图像进行随机尺度[0.95 1.05)的缩放 CenterCrop(self.cfg.instance_sz - 8), # cfg.instance_sz = 255 从图像中间裁剪,尺寸为255-8大小 RandomCrop(self.cfg.instance_sz - 2 * 8), # cfg.instance_sz = 255 对图像进行随机裁剪,尺寸为255 - 2*8 CenterCrop(self.cfg.exemplar_sz ), # cfg.exemplar_sz = 127 然后在中心裁剪成127 * 127 ToTensor() ]) self.transform_x = Compose([ RandomStretch(max_stretch=0.05), CenterCrop(self.cfg.instance_sz - 8), RandomCrop(self.cfg.instance_sz - 2 * 8), ToTensor() ])
# 将file_path中的预训练模型读入net def load_pre_train_mode(net, file_path): # 读取预训练的网络 pretrained_dict = torch.load(file_path) my_model_dict = net.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in my_model_dict } my_model_dict.update(pretrained_dict) net.load_state_dict(my_model_dict) if __name__ == '__main__': # setup dataset para = param() name = 'GOT-10k' assert name in ['VID', 'GOT-10k'] if name == 'GOT-10k': root_dir = '/home/fanfu/data/GOT-10k' seq_dataset = GOT10k(root_dir, subset='train') elif name == 'VID': root_dir = '/home/fanfu/data/ILSVRC2015/Data/VID/' seq_dataset = ImageNetVID(root_dir, subset=('train', 'val')) pair_dataset = Pairwise(seq_dataset) # setup data loader cuda = torch.cuda.is_available() loader = DataLoader(pair_dataset, batch_size=8, shuffle=True,