model2['layer2.1.weight'] = model1.modules[42].modules[8].weight model2['layer2.1.bias'] = model1.modules[42].modules[8].bias model2['fc.0.weight'] = model1.modules[43].modules[1].modules[3].weight model2['fc.0.bias'] = model1.modules[43].modules[1].modules[3].bias model2['fc.2.weight'] = model1.modules[43].modules[1].modules[5].weight model2['fc.2.bias'] = model1.modules[43].modules[1].modules[5].bias save_name = os.path.join('experiments', 'vgg_SCNN_DULR_w9', 'vgg_SCNN_DULR_w9.pth') torch.save(model2, save_name) # load and save again net = SCNN(pretrained=False) d = torch.load(save_name) net.load_state_dict(d, strict=False) for m in net.backbone.modules(): if isinstance(m, nn.Conv2d): if m.bias is not None: m.bias.data.zero_() save_dict = { "epoch": 0, "net": net.state_dict(), "optim": None, "lr_scheduler": None } if not os.path.exists(os.path.join('experiments', 'vgg_SCNN_DULR_w9')): os.makedirs(os.path.join('experiments', 'vgg_SCNN_DULR_w9'), exist_ok=True) torch.save(save_dict, save_name)
net = torch.nn.DataParallel(net) lr = exp_cfg['optim']['lr'] momentum = exp_cfg['optim']['momentum'] weight_decay = exp_cfg['optim']['weight_decay'] nesterov = exp_cfg['optim']['nesterov'] # Horovod: scale learning rate by lr_scaler. optimizer = optim.SGD(net.parameters(), lr=lr * lr_scaler, momentum=momentum, weight_decay=weight_decay, nesterov=nesterov) # Horovod: broadcast parameters & optimizer state. hvd.broadcast_parameters(net.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(optimizer, root_rank=0) # Horovod: (optional) compression algorithm. #compression = hvd.Compression.fp16 # Horovod: wrap optimizer with DistributedOptimizer. gradient_predivide_factor = 1.0 optimizer = hvd.DistributedOptimizer( optimizer, named_parameters=net.named_parameters(), #compression=compression, op=hvd.Average, gradient_predivide_factor=gradient_predivide_factor) lr_scheduler = PolyLR(optimizer, 0.9, **exp_cfg['lr_scheduler'])