def steepest_descent_learn_filter_resnet18_newiou(filter_size=1, optim_iter=3, optim_init_step=1.0, optim_init_reg=0.01, output_activation=None, classification_layer='layer3', backbone_pretrained=False, clf_feat_blocks=1, clf_feat_norm=True, init_filter_norm=False, final_conv=False, out_feature_dim=256, init_gauss_sigma=1.0, num_dist_bins=5, bin_displacement=1.0, test_loss=None, mask_init_factor=4.0, iou_input_dim=(256,256), iou_inter_dim=(256,256), jitter_sigma_factor=None, train_backbone=True): # backbone backbone_net = backbones.resnet18(pretrained=backbone_pretrained) norm_scale = math.sqrt(1.0 / (out_feature_dim * filter_size * filter_size)) # classifier clf_feature_extractor = clf_features.residual_basic_block(num_blocks=clf_feat_blocks, l2norm=clf_feat_norm, final_conv=final_conv, norm_scale=norm_scale, out_dim=out_feature_dim) initializer = clf_initializer.FilterInitializerLinear(filter_size=filter_size, filter_norm=init_filter_norm, feature_dim=out_feature_dim) optimizer = clf_optimizer.SteepestDescentLearn(num_iter=optim_iter, filter_size=filter_size, init_step_length=optim_init_step, init_filter_reg=optim_init_reg, feature_dim=out_feature_dim, init_gauss_sigma=init_gauss_sigma, num_dist_bins=num_dist_bins, bin_displacement=bin_displacement, test_loss=test_loss, mask_init_factor=mask_init_factor) classifier = target_clf.LinearFilter(filter_size=filter_size, filter_initializer=initializer, filter_optimizer=optimizer, feature_extractor=clf_feature_extractor, output_activation=output_activation, jitter_sigma_factor=jitter_sigma_factor) # Bounding box regressor bb_regressor = bbmodels.AtomIoUNet(pred_input_dim=iou_input_dim, pred_inter_dim=iou_inter_dim) net = OptimTracker(feature_extractor=backbone_net, classifier=classifier, bb_regressor=bb_regressor, classification_layer=classification_layer, bb_regressor_layer=['layer2', 'layer3'], train_feature_extractor=train_backbone) return net
def steepest_descent_learn_filter_resnet50_newiou(filter_size=1, optim_iter=3, optim_init_step=1.0, optim_init_reg=0.01, output_activation=None, classification_layer='layer3', backbone_pretrained=False, clf_feat_blocks=1, clf_feat_norm=True, init_filter_norm=False, final_conv=False, out_feature_dim=256, init_gauss_sigma=1.0, num_dist_bins=5, bin_displacement=1.0, test_loss=None, mask_init_factor=4.0, iou_input_dim=(256,256), iou_inter_dim=(256,256), jitter_sigma_factor=None): # backbone backbone_net = backbones.resnet50(pretrained=backbone_pretrained) norm_scale = math.sqrt(1.0 / (out_feature_dim * filter_size * filter_size)) # classifier clf_feature_extractor = clf_features.residual_bottleneck_comb(num_blocks=clf_feat_blocks, l2norm=clf_feat_norm, final_conv=final_conv, norm_scale=norm_scale, out_dim=out_feature_dim) initializer = clf_initializer.FilterInitializerLinear(filter_size=filter_size, filter_norm=init_filter_norm, feature_dim=out_feature_dim) optimizer = clf_optimizer.SteepestDescentLearn(num_iter=optim_iter, filter_size=filter_size, init_step_length=optim_init_step, init_filter_reg=optim_init_reg, feature_dim=out_feature_dim, init_gauss_sigma=init_gauss_sigma, num_dist_bins=num_dist_bins, bin_displacement=bin_displacement, test_loss=test_loss, mask_init_factor=mask_init_factor) classifier = target_clf.LinearFilter(filter_size=filter_size, filter_initializer=initializer, filter_optimizer=optimizer, feature_extractor=clf_feature_extractor, output_activation=output_activation, jitter_sigma_factor=jitter_sigma_factor) # Bounding box regressor # combine RGB and TIR by 2* bb_regressor = bbmodels.AtomIoUNet(input_dim=(4*128,4*256), pred_input_dim=iou_input_dim, pred_inter_dim=iou_inter_dim) # load pretrained model pretrainmodel_path='/home/lichao/projects/pytracking_lichao/pytracking/DiMP_nets/sdlearn_300_onlytestloss_lr_causal_mg30_iou_nocf_res50_lfilt512_coco/OptimTracker_ep0040.pth.tar' pretrainmodel = loading.torch_load_legacy(pretrainmodel_path)['net'] usepretrain = True; updback = True; updcls = True; updbb = True if usepretrain: if updback: # update backbone backbone_dict = backbone_net.state_dict() pretrain_dict = {k[len('feature_extractor.'):]: v for k, v in pretrainmodel.items() if k[len('feature_extractor.'):] in backbone_dict} backbone_net.load_state_dict(pretrain_dict) if updcls: # update classifier pretrainmodel['classifier.feature_extractor.0.weight']=torch.cat((pretrainmodel['classifier.feature_extractor.0.weight'],pretrainmodel['classifier.feature_extractor.0.weight']),1) classifier_dict = classifier.state_dict() pretrain_dict = {k[len('classifier.'):]: v for k, v in pretrainmodel.items() if k[len('classifier.'):] in classifier_dict} #classifier_dict.update(pretrain_dict) classifier.load_state_dict(pretrain_dict) if updbb: # update Bounding box regressor bb_regressor_dict = bb_regressor.state_dict() pretrain_dict = {k[len('bb_regressor.'):]: v for k, v in pretrainmodel.items() if k[len('bb_regressor.'):] in bb_regressor_dict} bb_regressor.load_state_dict(pretrain_dict) net = OptimTracker(feature_extractor=backbone_net, classifier=classifier, bb_regressor=bb_regressor, classification_layer=classification_layer, bb_regressor_layer=['layer2', 'layer3']) return net
def kysnet_res50(filter_size=4, optim_iter=3, appearance_feature_dim=512, optim_init_step=0.9, optim_init_reg=0.1, classification_layer='layer3', backbone_pretrained=True, clf_feat_blocks=0, clf_feat_norm=True, final_conv=True, init_filter_norm=False, mask_init_factor=3.0, score_act='relu', target_mask_act='sigmoid', num_dist_bins=100, bin_displacement=0.1, detach_length=float('Inf'), train_feature_extractor=True, train_iounet=True, iou_input_dim=(256, 256), iou_inter_dim=(256, 256), cv_kernel_size=3, cv_max_displacement=9, cv_stride=1, init_gauss_sigma=1.0, state_dim=8, representation_predictor_dims=(64, 32), gru_ksz=3, conf_measure='max', dimp_thresh=None): # ######################## backbone ######################## backbone_net = backbones.resnet50(pretrained=backbone_pretrained) norm_scale = math.sqrt( 1.0 / (appearance_feature_dim * filter_size * filter_size)) # ######################## classifier ######################## clf_feature_extractor = clf_features.residual_bottleneck( num_blocks=clf_feat_blocks, l2norm=clf_feat_norm, final_conv=final_conv, norm_scale=norm_scale, out_dim=appearance_feature_dim) # Initializer for the DiMP classifier initializer = clf_initializer.FilterInitializerLinear( filter_size=filter_size, filter_norm=init_filter_norm, feature_dim=appearance_feature_dim) # Optimizer for the DiMP classifier optimizer = clf_optimizer.DiMPSteepestDescentGN( num_iter=optim_iter, feat_stride=16, init_step_length=optim_init_step, init_filter_reg=optim_init_reg, init_gauss_sigma=init_gauss_sigma, num_dist_bins=num_dist_bins, bin_displacement=bin_displacement, mask_init_factor=mask_init_factor, score_act=score_act, act_param=None, mask_act=target_mask_act, detach_length=detach_length) # The classifier module classifier = target_clf.LinearFilter( filter_size=filter_size, filter_initializer=initializer, filter_optimizer=optimizer, feature_extractor=clf_feature_extractor) # Bounding box regressor bb_regressor = bbmodels.AtomIoUNet(input_dim=(4 * 128, 4 * 256), pred_input_dim=iou_input_dim, pred_inter_dim=iou_inter_dim) cost_volume_layer = cost_volume.CostVolume(cv_kernel_size, cv_max_displacement, stride=cv_stride, abs_coordinate_output=True) motion_response_predictor = resp_pred.ResponsePredictor( state_dim=state_dim, representation_predictor_dims=representation_predictor_dims, gru_ksz=gru_ksz, conf_measure=conf_measure, dimp_thresh=dimp_thresh) response_predictor = predictor_wrappers.PredictorWrapper( cost_volume_layer, motion_response_predictor) net = KYSNet(backbone_feature_extractor=backbone_net, dimp_classifier=classifier, predictor=response_predictor, bb_regressor=bb_regressor, classification_layer=classification_layer, bb_regressor_layer=['layer2', 'layer3'], train_feature_extractor=train_feature_extractor, train_iounet=train_iounet) return net