# if engine.distributed: # base_lr = config.lr * engine.world_size params_list = [] params_list = group_weight(params_list, model.backbone, BatchNorm2d, base_lr) params_list = group_weight(params_list, model.head_layer, BatchNorm2d, base_lr) params_list = group_weight(params_list, model.aux_layer, BatchNorm2d, base_lr) params_list = group_weight(params_list, model.context, BatchNorm2d, base_lr) # config lr policy total_iteration = config.nepochs * config.niters_per_epoch lr_policy = PolyLR(base_lr, config.lr_power, total_iteration) optimizer = torch.optim.SGD(params_list, lr=base_lr, momentum=config.momentum, weight_decay=config.weight_decay) if engine.distributed: if torch.cuda.is_available(): model.cuda() model = DistributedDataParallel(model) else: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # model = DataParallelModel(model, engine.devices) model.to(device) engine.register_state(dataloader=train_loader,
# BatchNorm2d, base_lr) # params_list = group_weight(params_list, model.refine_64, # BatchNorm2d, base_lr) # params_list = group_weight(params_list, model.up_512, # BatchNorm2d, base_lr) # params_list = group_weight(params_list, model.up_256, # BatchNorm2d, base_lr) # params_list = group_weight(params_list, model.up_128, # BatchNorm2d, base_lr) # params_list = group_weight(params_list, model.up_final, # BatchNorm2d, base_lr) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) total_iteration = config.nepochs * config.niters_per_epoch lr_policy = PolyLR(base_lr, config.lr_power, total_iteration) optimizer = torch.optim.SGD(params_list, lr=base_lr, momentum=config.momentum, weight_decay=config.weight_decay) # for state in optimizer.state.values(): # for k, v in state.items(): # if isinstance(v, torch.Tensor): # state[k] = v.cuda() # optimizer = torch.optim.Adam(model.parameters()) # register state dictations engine.register_state(dataloader=train_loader, model=model, optimizer=optimizer) if engine.continue_state_object: engine.restore_checkpoint()