def initialize(self): with fluid.dygraph.guard(): if os.path.isabs(self.net_path): net_path_full = self.net_path else: net_path_full = os.path.join(env_settings().network_path, self.net_path) self.net = atom_resnet18( backbone_pretrained=False, backbone_is_test=True, iounet_is_test=True) state_dictsm, _ = fluid.load_dygraph(net_path_full) self.net.load_dict(state_dictsm) self.net.train() self.iou_predictor = self.net.bb_regressor self.layer_stride = { 'conv0': 2, 'conv1': 2, 'block0': 4, 'block1': 8, 'block2': 16, 'block3': 32, 'classification': 16, 'fc': None } self.layer_dim = { 'conv0': 64, 'conv1': 64, 'block0': 64, 'block1': 128, 'block2': 256, 'block3': 512, 'classification': 256, 'fc': None } self.iounet_feature_layers = self.net.bb_regressor_layer if isinstance(self.pool_stride, int) and self.pool_stride == 1: self.pool_stride = [1] * len(self.output_layers) self.feature_layers = sorted( list(set(self.output_layers + self.iounet_feature_layers))) self.mean = np.reshape([0.485, 0.456, 0.406], [1, -1, 1, 1]) self.std = np.reshape([0.229, 0.224, 0.225], [1, -1, 1, 1])
def main(): parser = argparse.ArgumentParser(description='Generate success and precision plots') parser.add_argument('backbone', type=str) args = parser.parse_args() if args.backbone in ['teacher', 'resnet18', 'default']: model_name = 'atom_resnet18' net = atom_models.atom_resnet18(backbone_pretrained=False, cpu=True) path = '/content/pytracking/pytracking/networks/atom_default.pth' elif args.backbone in ['resnet18tiny', 'resnet18small', 'resnet18medium']: model_name = 'atom_'+args.backbone net_constructor = getattr(atom_models, model_name) net = net_constructor(backbone_pretrained=False, cpu=True) path = '/content/pytracking/pytracking/networks/cfkd_'+args.backbone+'.pth.tar' elif args.backbone in ['mobilenet', 'cfkd']: model_name = 'atom_mobilenetsmall' net = atom_models.atom_mobilenetsmall(backbone_pretrained=False, cpu=True) path = '/content/pytracking/pytracking/networks/atom_cfkd.pth.tar' else: print('wrong model name') return net = loading.load_weights(net, path, strict=True) net_type = type(net).__name__ state = { # 'epoch': self.epoch, # 'actor_type': actor_type, 'net_type': net_type, 'net': net.state_dict(), 'net_info': getattr(net, 'info', None), 'constructor': getattr(net, 'constructor', None) # 'optimizer': self.optimizer.state_dict(), # 'stats': self.stats, # 'settings': self.settings } tmp_name = '/content/pytracking/pytracking/networks/'+model_name+'_cpu.tmp' torch.save(state, tmp_name) os.rename(tmp_name, '/content/pytracking/pytracking/networks/'+model_name+'_cpu.pth.tar')
def run(settings): # Most common settings are assigned in the settings struct settings.description = 'ATOM IoUNet with default settings according to the paper.' settings.batch_size = 64 settings.num_workers = 8 settings.print_interval = 1 settings.normalize_mean = [0.485, 0.456, 0.406] settings.normalize_std = [0.229, 0.224, 0.225] settings.search_area_factor = 5.0 settings.feature_sz = 18 settings.output_sz = settings.feature_sz * 16 settings.center_jitter_factor = {'train': 0, 'test': 4.5} settings.scale_jitter_factor = {'train': 0, 'test': 0.5} # Train datasets lasot_train = Lasot(settings.env.lasot_dir, split='train') trackingnet_train = TrackingNet(settings.env.trackingnet_dir, set_ids=list(range(11))) coco_train = MSCOCOSeq(settings.env.coco_dir) # Validation datasets trackingnet_val = TrackingNet(settings.env.trackingnet_dir, set_ids=list(range(11,12))) # The joint augmentation transform, that is applied to the pairs jointly transform_joint = tfm.Transform(tfm.ToGrayscale(probability=0.05)) # The augmentation transform applied to the training set (individually to each image in the pair) transform_train = tfm.Transform(tfm.ToTensorAndJitter(0.2), tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)) # The augmentation transform applied to the validation set (individually to each image in the pair) transform_val = tfm.Transform(tfm.ToTensor(), tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)) # Data processing to do on the training pairs proposal_params = {'min_iou': 0.1, 'boxes_per_frame': 16, 'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3]} data_processing_train = processing.ATOMProcessing(search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=proposal_params, transform=transform_train, joint_transform=transform_joint) # Data processing to do on the validation pairs data_processing_val = processing.ATOMProcessing(search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=proposal_params, transform=transform_val, joint_transform=transform_joint) # The sampler for training dataset_train = sampler.ATOMSampler([lasot_train, trackingnet_train, coco_train], [1,1,1], samples_per_epoch=1000*settings.batch_size, max_gap=50, processing=data_processing_train) # The loader for training loader_train = LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=True, drop_last=True, stack_dim=1) # The sampler for validation dataset_val = sampler.ATOMSampler([trackingnet_val], [1], samples_per_epoch=500*settings.batch_size, max_gap=50, processing=data_processing_val) # The loader for validation loader_val = LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=False, drop_last=True, epoch_interval=5, stack_dim=1) # Create network and actor net = atom_models.atom_resnet18(backbone_pretrained=True) objective = nn.MSELoss() actor = actors.AtomActor(net=net, objective=objective) # Optimizer optimizer = optim.Adam(actor.net.bb_regressor.parameters(), lr=1e-3) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.2) # Create trainer trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer, settings, lr_scheduler) # Run training (set fail_safe=False if you are debugging) trainer.train(50, load_latest=True, fail_safe=True)
def run(settings): # Most common settings are assigned in the settings struct settings.description = 'ATOM IoUNet with ResNet18 backbone and trained with vid, lasot, coco.' settings.print_interval = 1 # How often to print loss and other info settings.batch_size = 64 # Batch size settings.num_workers = 4 # Number of workers for image loading settings.normalize_mean = [0.485, 0.456, 0.406 ] # Normalize mean (default ImageNet values) settings.normalize_std = [0.229, 0.224, 0.225] # Normalize std (default ImageNet values) settings.search_area_factor = 5.0 # Image patch size relative to target size settings.feature_sz = 18 # Size of feature map settings.output_sz = settings.feature_sz * 16 # Size of input image patches # Settings for the image sample and proposal generation settings.center_jitter_factor = {'train': 0, 'test': 4.5} settings.scale_jitter_factor = {'train': 0, 'test': 0.5} settings.proposal_params = { 'min_iou': 0.1, 'boxes_per_frame': 16, 'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3] } # Train datasets vid_train = ImagenetVID() lasot_train = Lasot(split='train') coco_train = MSCOCOSeq() # Validation datasets got10k_val = Got10k(split='val') # The joint augmentation transform, that is applied to the pairs jointly transform_joint = dltransforms.ToGrayscale(probability=0.05) # The augmentation transform applied to the training set (individually to each image in the pair) transform_train = dltransforms.Compose([ dltransforms.ToArrayAndJitter(0.2), dltransforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std) ]) # The augmentation transform applied to the validation set (individually to each image in the pair) transform_val = dltransforms.Compose([ dltransforms.ToArray(), dltransforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std) ]) # Data processing to do on the training pairs data_processing_train = processing.ATOMProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=settings.proposal_params, transform=transform_train, joint_transform=transform_joint) # Data processing to do on the validation pairs data_processing_val = processing.ATOMProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=settings.proposal_params, transform=transform_val, joint_transform=transform_joint) # The sampler for training dataset_train = sampler.ATOMSampler( [vid_train, lasot_train, coco_train], [1, 1, 1], samples_per_epoch=1000 * settings.batch_size, max_gap=50, processing=data_processing_train) # The loader for training train_loader = loader.LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=4, stack_dim=1) # The sampler for validation dataset_val = sampler.ATOMSampler([got10k_val], [ 1, ], samples_per_epoch=500 * settings.batch_size, max_gap=50, processing=data_processing_val) # The loader for validation val_loader = loader.LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, epoch_interval=5, num_workers=4, stack_dim=1) # creat network, set objective, creat optimizer, learning rate scheduler, trainer with dygraph.guard(): # Create network net = atom_resnet18(backbone_pretrained=True) # Freeze backbone state_dicts = net.state_dict() for k in state_dicts.keys(): if 'feature_extractor' in k and "running" not in k: state_dicts[k].stop_gradient = True # Set objective objective = fluid.layers.square_error_cost # Create actor, which wraps network and objective actor = actors.AtomActor(net=net, objective=objective) # Set to training mode actor.train() # define optimizer and learning rate gama = 0.2 lr = 1e-3 lr_scheduler = fluid.dygraph.PiecewiseDecay( [15, 30, 45], values=[lr, lr * gama, lr * gama * gama], step=1000, begin=0) optimizer = fluid.optimizer.Adam( parameter_list=net.bb_regressor.parameters(), learning_rate=lr_scheduler) trainer = LTRTrainer(actor, [train_loader, val_loader], optimizer, settings, lr_scheduler) trainer.train(40, load_latest=False, fail_safe=False)
def run(settings): # Most common settings are assigned in the settings struct settings.description = 'distilled ATOM IoUNet with default settings according to the paper.' settings.batch_size = 32 settings.num_workers = 8 settings.print_interval = 1 settings.normalize_mean = [0.485, 0.456, 0.406] settings.normalize_std = [0.229, 0.224, 0.225] settings.search_area_factor = 5.0 settings.feature_sz = 18 settings.output_sz = settings.feature_sz * 16 settings.center_jitter_factor = {'train': 0, 'test': 4.5} settings.scale_jitter_factor = {'train': 0, 'test': 0.5} # Train datasets lasot_train = Lasot(settings.env.lasot_dir, split='train') trackingnet_train = TrackingNet(settings.env.trackingnet_dir, set_ids=list(range(11))) coco_train = MSCOCOSeq(settings.env.coco_dir) # Validation datasets trackingnet_val = TrackingNet(settings.env.trackingnet_dir, set_ids=list(range(11, 12))) # The joint augmentation transform, that is applied to the pairs jointly transform_joint = tfm.Transform(tfm.ToGrayscale(probability=0.05)) # The augmentation transform applied to the training set (individually to each image in the pair) transform_train = tfm.Transform( tfm.ToTensorAndJitter(0.2), tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)) # The augmentation transform applied to the validation set (individually to each image in the pair) transform_val = tfm.Transform( tfm.ToTensor(), tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)) # Data processing to do on the training pairs proposal_params = { 'min_iou': 0.1, 'boxes_per_frame': 16, 'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3] } data_processing_train = processing.ATOMProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=proposal_params, transform=transform_train, joint_transform=transform_joint) # Data processing to do on the validation pairs data_processing_val = processing.ATOMProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=proposal_params, transform=transform_val, joint_transform=transform_joint) # The sampler for training dataset_train = sampler.ATOMSampler( [lasot_train, trackingnet_train, coco_train], [1, 1, 1], samples_per_epoch=1000 * settings.batch_size, max_gap=50, processing=data_processing_train) # The loader for training loader_train = LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=True, drop_last=True, stack_dim=1) # The sampler for validation dataset_val = sampler.ATOMSampler([trackingnet_val], [1], samples_per_epoch=500 * settings.batch_size, max_gap=50, processing=data_processing_val) # The loader for validation loader_val = LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=False, drop_last=True, epoch_interval=5, stack_dim=1) # Load teacher network teacher_net = atom_models.atom_resnet18(backbone_pretrained=True) teacher_path = '/home/ddanier/CFKD/pytracking/networks/atom_default.pth' teacher_net = loading.load_weights(teacher_net, teacher_path, strict=True) print( '*******************Teacher net loaded successfully*******************' ) # Create student network and actor student_net = atom_models.atom_mobilenetsmall(backbone_pretrained=False) ########################################################## ### Distil backbone first, turn off grad for regressor ### ########################################################## for p in student_net.bb_regressor.parameters(): p.requires_grad_(False) objective = distillation.CFKDLoss( reg_loss=nn.MSELoss(), w_ts=0., w_ah=0., w_cf=0.01, w_fd=100., cf_layers=['conv1', 'layer1', 'layer2', 'layer3']) actor = actors.AtomCompressionActor(student_net, teacher_net, objective) # Optimizer optimizer = optim.Adam(actor.student_net.feature_extractor.parameters(), lr=1e-2) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.1) # Create trainer trainer = LTRDistillationTrainer(actor, [loader_train, loader_val], optimizer, settings, lr_scheduler) # Run training (set fail_safe=False if you are debugging) trainer.train(50, load_latest=False, fail_safe=True) ######################################################## ## Distil regressor next, turn off grad for backbone ### ######################################################## for p in trainer.actor.student_net.bb_regressor.parameters(): p.requires_grad_(True) for p in trainer.actor.student_net.feature_extractor.parameters(): p.requires_grad_(False) objective = distillation.CFKDLoss(reg_loss=nn.MSELoss(), w_ts=1., w_ah=0.1, w_cf=0., w_fd=0.) trainer.actor.objective = objective # Optimizer trainer.optimizer = optim.Adam( trainer.actor.student_net.bb_regressor.parameters(), lr=1e-2) trainer.lr_scheduler = optim.lr_scheduler.StepLR(trainer.optimizer, step_size=15, gamma=0.1) # Run training (set fail_safe=False if you are debugging) trainer.train(100, load_latest=False, fail_safe=True)
def run(settings): # Most common settings are assigned in the settings struct settings.description = 'ATOM IoUNet with default settings.' settings.print_interval = 1 # How often to print loss and other info settings.batch_size = 64 # Batch size settings.num_workers = 4 # Number of workers for image loading settings.normalize_mean = [ 0.485, 0.456, 0.406 ] # Normalize mean (default pytorch ImageNet values) settings.normalize_std = [ 0.229, 0.224, 0.225 ] # Normalize std (default pytorch ImageNet values) settings.search_area_factor = 5.0 # Image patch size relative to target size settings.feature_sz = 18 # Size of feature map settings.output_sz = settings.feature_sz * 16 # Size of input image patches # Settings for the image sample and proposal generation settings.center_jitter_factor = {'train': 0, 'test': 4.5} settings.scale_jitter_factor = {'train': 0, 'test': 0.5} settings.proposal_params = { 'min_iou': 0.1, 'boxes_per_frame': 16, 'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3] } # Train datasets lasot_train = Lasot(split='train') ptb_train = PrincetonRGBD(split='validation') #trackingnet_train = TrackingNet(set_ids=list(range(11))) #coco_train = MSCOCOSeq() # Validation datasets trackingnet_val = Lasot( split='train') #TrackingNet(set_ids=list(range(11,12))) # The joint augmentation transform, that is applied to the pairs jointly transform_joint = dltransforms.ToGrayscale(probability=0.05) # The augmentation transform applied to the training set (individually to each image in the pair) transform_train = torchvision.transforms.Compose([ dltransforms.ToTensorAndJitter(0.2), torchvision.transforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std) ]) # The augmentation transform applied to the validation set (individually to each image in the pair) transform_val = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std) ]) # Data processing to do on the training pairs data_processing_train = processing.ATOMProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=settings.proposal_params, transform=transform_train, joint_transform=transform_joint) # Data processing to do on the validation pairs data_processing_val = processing.ATOMProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=settings.proposal_params, transform=transform_val, joint_transform=transform_joint) # The sampler for training dataset_train = sampler.ATOMSampler([lasot_train, ptb_train], [1, 1], samples_per_epoch=1000 * settings.batch_size, max_gap=50, processing=data_processing_train) # The loader for training loader_train = LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=True, drop_last=True, stack_dim=1) # The sampler for validation dataset_val = sampler.ATOMSampler([trackingnet_val], [1], samples_per_epoch=500 * settings.batch_size, max_gap=50, processing=data_processing_val) # The loader for validation loader_val = LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=False, drop_last=True, epoch_interval=5, stack_dim=1) # Create network net = atom_models.atom_resnet18(backbone_pretrained=True) # Set objective objective = nn.MSELoss() # Create actor, which wraps network and objective actor = actors.AtomActor(net=net, objective=objective) # Optimizer optimizer = optim.Adam(actor.net.bb_regressor.parameters(), lr=1e-5) # Learning rate scheduler lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.2) # Create trainer trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer, settings, lr_scheduler) # Run training (set fail_safe=False if you are debugging) trainer.train(1, load_latest=True, fail_safe=False ) #, path_pretrainedatom='./checkpoints/atom_default.pth')
def run(settings): # Most common settings are assigned in the settings struct settings.description = 'ATOM using the probabilistic maximum likelihood trained regression model for bounding-box' \ 'regression presented in [https://arxiv.org/abs/1909.12297].' settings.batch_size = 64 settings.num_workers = 8 settings.print_interval = 1 settings.normalize_mean = [0.485, 0.456, 0.406] settings.normalize_std = [0.229, 0.224, 0.225] settings.search_area_factor = 5.0 settings.feature_sz = 18 settings.output_sz = settings.feature_sz * 16 settings.center_jitter_factor = {'train': 0, 'test': 4.5} settings.scale_jitter_factor = {'train': 0, 'test': 0.5} # Train datasets lasot_train = Lasot(settings.env.lasot_dir, split='train') got10k_train = Got10k(settings.env.got10k_dir, split='vottrain') trackingnet_train = TrackingNet(settings.env.trackingnet_dir, set_ids=list(range(4))) coco_train = MSCOCOSeq(settings.env.coco_dir) # Validation datasets got10k_val = Got10k(settings.env.got10k_dir, split='votval') # The joint augmentation transform, that is applied to the pairs jointly transform_joint = tfm.Transform(tfm.ToGrayscale(probability=0.05)) # The augmentation transform applied to the training set (individually to each image in the pair) transform_train = tfm.Transform( tfm.ToTensorAndJitter(0.2), tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)) # The augmentation transform applied to the validation set (individually to each image in the pair) transform_val = tfm.Transform( tfm.ToTensor(), tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)) # Data processing to do on the training pairs proposal_params = { 'boxes_per_frame': 128, 'gt_sigma': (0, 0), 'proposal_sigma': [(0.05, 0.05), (0.5, 0.5)], 'add_mean_box': True } data_processing_train = processing.KLBBregProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=proposal_params, transform=transform_train, joint_transform=transform_joint) # Data processing to do on the validation pairs data_processing_val = processing.KLBBregProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=proposal_params, transform=transform_val, joint_transform=transform_joint) # The sampler for training dataset_train = sampler.ATOMSampler( [lasot_train, got10k_train, trackingnet_train, coco_train], [1, 1, 1, 1], samples_per_epoch=1000 * settings.batch_size, max_gap=200, processing=data_processing_train) # The loader for training loader_train = LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=True, drop_last=True, stack_dim=1) # The sampler for validation dataset_val = sampler.ATOMSampler([got10k_val], [1], samples_per_epoch=500 * settings.batch_size, max_gap=200, processing=data_processing_val) # The loader for validation loader_val = LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=False, drop_last=True, epoch_interval=5, stack_dim=1) # Create network and actor net = atom_models.atom_resnet18(backbone_pretrained=True) objective = klreg_losses.MLRegression() actor = bbreg_actors.AtomBBKLActor(net=net, objective=objective) # Optimizer optimizer = optim.Adam(actor.net.bb_regressor.parameters(), lr=1e-3) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.2) # Create trainer trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer, settings, lr_scheduler) # Run training (set fail_safe=False if you are debugging) trainer.train(50, load_latest=True, fail_safe=True)