def make_network(configs): train_cfg = configs['train'] config = configs['inference'] ## creating new posenet PoseNet = importNet(configs['network']) poseNet = PoseNet(**config) forward_net = DataParallel(poseNet.cuda()) config['net'] = forward_net config['lossLayers'] = KeypointLoss(configs['inference']['num_parts'], configs['inference']['nstack'], configs['inference']['num_class']) ## optimizer, experiment setup train_cfg['optimizer'] = torch.optim.Adam(config['net'].parameters(), train_cfg['learning_rate']) train_cfg['scheduler'] = lr_scheduler.StepLR( train_cfg['optimizer'], step_size=train_cfg['decay_iters'], gamma=train_cfg['decay_lr'] ) # 更新学习率的策略: 每隔step_size个epoch就将学习率降为原来的gamma倍。 exp_path = os.path.join('exp', configs['opt'].exp) if configs['opt'].exp == 'pose' and configs['opt'].continue_exp is not None: exp_path = os.path.join('exp', configs['opt'].continue_exp) if not os.path.exists(exp_path): os.mkdir(exp_path) logger = open(os.path.join(exp_path, 'log'), 'a+') config['logger'] = logger
def init(): """ task.__config__ contains the variables that control the training and testing make_network builds a function which can do forward and backward propagation """ opt = parse_command_line() task = importlib.import_module('task.pose') configs = task.__config__ configs['opt'] = opt train_cfg = configs['train'] config = configs['inference'] ## creating new posenet PoseNet = importNet(configs['network']) poseNet = PoseNet(**config) config['net'] = poseNet reload(configs) return configs
def make_network(configs): # called utils.misc.importNet which return module dictionary(contains 'models.posenet.PoseNet') PoseNet = importNet(configs['network']) train_cfg = configs['train'] config = configs['inference'] poseNet = PoseNet(**config) # this is implementation of data parallel in model level. st, batch size lager than number of GPU forward_net = DataParallel(poseNet.cuda()) def calc_loss(*args, **kwargs): # 'poseNet' loss calculation function return poseNet.calc_loss(*args, **kwargs) config['net'] = Trainer(forward_net, configs['inference']['keys'], calc_loss) # torch.optim is a package that can implement optimization algorithms train_cfg['optimizer'] = torch.optim.Adam(config['net'].parameters(), train_cfg['learning_rate']) # direct to training output path or make a directory exp_path = os.path.join('exp', configs['opt'].exp) if not os.path.exists(exp_path): os.mkdir(exp_path) logger = open(os.path.join(exp_path, 'log'), 'a+') def make_train(batch_id, config, phase, **inputs): # get the gradient of the input and make it cuda data type for i in inputs: inputs[i] = make_input(inputs[i]) net = config['inference']['net'] config['batch_id'] = batch_id # check current phase, set it train or evaluation if phase == 'train': net = net.train() else: net = net.eval() # check if current stage is inference or not, it relate to 'train' and 'evaluation' if phase != 'inference': # if it is 'train'. # {i: inputs[i] for i in inputs if i!='imgs'} is to separate inputs from images result = net(inputs['imgs'], **{i: inputs[i] for i in inputs if i!='imgs'}) num_loss = len(config['train']['loss']) "I use the last outputs as the loss" "the weights of the loss are controlled by config['train']['loss'] " losses = {i[0]: result[-num_loss + idx]*i[1] for idx, i in enumerate(config['train']['loss'])} loss = 0 # this is to write the log of training process toprint = '\n{}: '.format(batch_id) for i in losses: loss = loss + torch.mean(losses[i]) my_loss = make_output( losses[i] ) my_loss = my_loss.mean(axis = 0) if my_loss.size == 1: toprint += ' {}: {}'.format(i, format(my_loss.mean(), '.8f')) else: toprint += '\n{}'.format(i) for j in my_loss: toprint += ' {}'.format(format(j.mean(), '.8f')) logger.write(toprint) logger.flush() if batch_id == 200000: ## decrease the learning rate after 200000 iterations for param_group in optimizer.param_groups: param_group['lr'] = 1e-5 if phase == 'train': optimizer = train_cfg['optimizer'] # set the gradient to zero, before backpropragation. The PyTorch accumulates the gradients # on subsequent backward pass, it suitable for RNN rather than CNN optimizer.zero_grad() loss.backward() optimizer.step() return None else: # if it is not 'train'. used when it is test.py. need it to return the predictions # return matrix cpu data type out = {} net = net.eval() result = net(**inputs) if type(result)!=list and type(result)!=tuple: result = [result] out['preds'] = [make_output(i) for i in result] return out return make_train
def make_network(configs): train_cfg = configs['train'] config = configs['inference'] def calc_loss(*args, **kwargs): return poseNet.calc_loss(*args, **kwargs) ## creating new posenet PoseNet = importNet(configs['network']) poseNet = PoseNet(**config) forward_net = DataParallel(poseNet.cuda()) config['net'] = Trainer(forward_net, configs['inference']['keys'], calc_loss) ## optimizer, experiment setup train_cfg['optimizer'] = torch.optim.Adam( filter(lambda p: p.requires_grad, config['net'].parameters()), train_cfg['learning_rate']) exp_path = os.path.join('exp', configs['opt'].exp) if configs['opt'].exp == 'pose' and configs['opt'].continue_exp is not None: exp_path = os.path.join('exp', configs['opt'].continue_exp) if not os.path.exists(exp_path): os.mkdir(exp_path) logger = open(os.path.join(exp_path, 'log'), 'a+') def make_train(batch_id, config, phase, **inputs): for i in inputs: try: inputs[i] = make_input(inputs[i]) except: pass #for last input, which is a string (id_) net = config['inference']['net'] config['batch_id'] = batch_id net = net.train() if phase != 'inference': result = net(inputs['imgs'], **{i: inputs[i] for i in inputs if i != 'imgs'}) num_loss = len(config['train']['loss']) losses = { i[0]: result[-num_loss + idx] * i[1] for idx, i in enumerate(config['train']['loss']) } loss = 0 toprint = '\n{}: '.format(batch_id) for i in losses: loss = loss + torch.mean(losses[i]) my_loss = make_output(losses[i]) my_loss = my_loss.mean() if my_loss.size == 1: toprint += ' {}: {}'.format(i, format(my_loss.mean(), '.8f')) else: toprint += '\n{}'.format(i) for j in my_loss: toprint += ' {}'.format(format(j.mean(), '.8f')) logger.write(toprint) logger.flush() if phase == 'train': optimizer = train_cfg['optimizer'] optimizer.zero_grad() loss.backward() optimizer.step() if batch_id == config['train']['decay_iters']: ## decrease the learning rate after decay # iterations for param_group in optimizer.param_groups: param_group['lr'] = config['train']['decay_lr'] return None else: out = {} net = net.eval() result = net(**inputs) if type(result) != list and type(result) != tuple: result = [result] out['preds'] = [make_output(i) for i in result] return out return make_train
def make_network(configs): PoseNet = importNet(configs['network']) train_cfg = configs['train'] config = configs['inference'] poseNet = PoseNet(**config) forward_net = DataParallel(poseNet.cuda()) def calc_loss(*args, **kwargs): return poseNet.calc_loss(*args, **kwargs) config['net'] = Trainer(forward_net, configs['inference']['keys'], calc_loss) train_cfg['optimizer'] = torch.optim.Adam(config['net'].parameters(), train_cfg['learning_rate']) exp_path = os.path.join('exp', configs['opt'].exp) if not os.path.exists(exp_path): os.mkdir(exp_path) logger = open(os.path.join(exp_path, 'log'), 'a+') def make_train(batch_id, config, phase, **inputs): for i in inputs: inputs[i] = make_input(inputs[i]) net = config['inference']['net'] config['batch_id'] = batch_id if phase == 'train': net = net.train() else: net = net.eval() if phase != 'inference': result = net(inputs['imgs'], **{i:inputs[i] for i in inputs if i!='imgs'}) num_loss = len(config['train']['loss']) ## I use the last outputs as the loss ## the weights of the loss are controlled by config['train']['loss'] losses = {i[0]: result[-num_loss + idx]*i[1] for idx, i in enumerate(config['train']['loss'])} loss = 0 toprint = '\n{}: '.format(batch_id) for i in losses: loss = loss + torch.mean(losses[i]) my_loss = make_output( losses[i] ) my_loss = my_loss.mean(axis = 0) if my_loss.size == 1: toprint += ' {}: {}'.format(i, format(my_loss.mean(), '.8f')) else: toprint += '\n{}'.format(i) for j in my_loss: toprint += ' {}'.format(format(j.mean(), '.8f')) logger.write(toprint) logger.flush() if batch_id == 200000: ## decrease the learning rate after 200000 iterations for param_group in optimizer.param_groups: param_group['lr'] = 1e-5 if phase == 'train': optimizer = train_cfg['optimizer'] optimizer.zero_grad() loss.backward() optimizer.step() return None else: out = {} net = net.eval() result = net(**inputs) if type(result)!=list and type(result)!=tuple: result = [result] out['preds'] = [make_output(i) for i in result] return out return make_train
def make_network(configs): PoseNet = importNet(configs['network']) train_cfg = configs['train'] config = configs['inference'] poseNet = PoseNet(**config) forward_net = DataParallel(poseNet.cuda()) def calc_loss(*args, **kwargs): return poseNet.calc_loss(*args, **kwargs) config['net'] = Trainer(forward_net, configs['inference']['keys'], calc_loss) train_cfg['optimizer'] = torch.optim.Adam(config['net'].parameters(), train_cfg['learning_rate']) exp_path = os.path.join('exp', configs['opt'].exp) if not os.path.exists(exp_path): os.mkdir(exp_path) logger = open(os.path.join(exp_path, 'log'), 'a+') def make_train(batch_id, config, phase, **inputs): for i in inputs: inputs[i] = make_input(inputs[i]) net = config['inference']['net'] config['batch_id'] = batch_id if phase != 'inference': result = net(inputs['imgs'], **{i:inputs[i] for i in inputs if i!='imgs'}) num_loss = len(config['train']['loss']) ## I use the last outputs as the loss ## the weights of the loss are controlled by config['train']['loss'] losses = {i[0]: result[-num_loss + idx]*i[1] for idx, i in enumerate(config['train']['loss'])} loss = 0 toprint = '\n{}: '.format(batch_id) for i in losses: loss = loss + torch.mean(losses[i]) my_loss = make_output( losses[i] ) my_loss = my_loss.mean(axis = 0) if my_loss.size == 1: toprint += ' {}: {}'.format(i, format(my_loss.mean(), '.8f')) else: toprint += '\n{}'.format(i) for j in my_loss: toprint += ' {}'.format(format(j.mean(), '.8f')) logger.write(toprint) logger.flush() if batch_id == 200000: ## decrease the learning rate after 200000 iterations for param_group in optimizer.param_groups: param_group['lr'] = 1e-5 optimizer = train_cfg['optimizer'] optimizer.zero_grad() loss.backward() optimizer.step() return None else: out = {} net = net.eval() result = net(**inputs) if type(result)!=list and type(result)!=tuple: result = [result] out['preds'] = [make_output(i) for i in result] return out return make_train