device_ids=cuda_devices, output_device=cuda_device.index, replicate_once=True) if multi_gpu: optimizer = mymodel.build_optimizer( Optimizer, lr=init_lr, betas=adam_betas_default, eps=ieps_adam_default, weight_decay=cnfg.weight_decay, amsgrad=use_ams, multi_gpu_optimizer=multi_gpu_optimizer, contiguous_parameters=contiguous_parameters) else: optimizer = Optimizer(get_model_parameters( mymodel, contiguous_parameters=contiguous_parameters), lr=init_lr, betas=adam_betas_default, eps=ieps_adam_default, weight_decay=cnfg.weight_decay, amsgrad=use_ams) optimizer.zero_grad(set_to_none=optm_step_zero_grad_set_none) lrsch = LRScheduler(optimizer, cnfg.isize, cnfg.warm_step, scale=cnfg.lr_scale) state_holder = None if statesf is None and cnt_states is None else Holder( **{ "optm": optimizer, "lrsch": lrsch, "pyrand": PyRandomState(), "thrand": THRandomState(use_cuda=use_cuda)
lossf.to(cuda_device) optimizer = Optimizer(filter_para_grad(mymodel.parameters()), lr=init_lr, betas=adam_betas_default, eps=ieps_adam_default, weight_decay=cnfg.weight_decay, amsgrad=use_ams) optimizer.zero_grad(set_to_none=optm_step_zero_grad_set_none) use_amp = cnfg.use_amp and use_cuda scaler = (MultiGPUGradScaler() if multi_gpu_optimizer else GradScaler()) if use_amp else None if multi_gpu: mymodel = DataParallelMT(mymodel, device_ids=cuda_devices, output_device=cuda_device.index, host_replicate=True, gather_output=False) lossf = DataParallelCriterion(lossf, device_ids=cuda_devices, output_device=cuda_device.index, replicate_once=True) if multi_gpu: optimizer = mymodel.build_optimizer(Optimizer, lr=init_lr, betas=adam_betas_default, eps=ieps_adam_default, weight_decay=cnfg.weight_decay, amsgrad=use_ams, multi_gpu_optimizer=multi_gpu_optimizer, contiguous_parameters=contiguous_parameters) else: optimizer = Optimizer(get_model_parameters(mymodel, contiguous_parameters=contiguous_parameters), lr=init_lr, betas=adam_betas_default, eps=ieps_adam_default, weight_decay=cnfg.weight_decay, amsgrad=use_ams) optimizer.zero_grad(set_to_none=optm_step_zero_grad_set_none) lrsch = LRScheduler(optimizer, cnfg.isize, cnfg.warm_step, scale=cnfg.lr_scale) state_holder = None if statesf is None and cnt_states is None else Holder(**{"optm": optimizer, "lrsch": lrsch, "pyrand": PyRandomState(), "thrand": THRandomState(use_cuda=use_cuda)}) num_checkpoint = cnfg.num_checkpoint cur_checkid = 0 tminerr = inf_default minloss, minerr = eva(vd, vl, mymodel, lossf, cuda_device, multi_gpu, use_amp) logger.info("".join(("Init lr: ", ",".join(tostr(getlr(optimizer))), ", Dev Loss/Error: %.3f %.2f" % (minloss, minerr)))) if fine_tune_m is None: