def train(opt, train_loader, m, criterion, optimizer, writer): loss_logger = DataLogger() acc_logger = DataLogger() m.train() norm_type = cfg.LOSS.get('NORM_TYPE', None) train_loader = tqdm(train_loader, dynamic_ncols=True) for i, (inps, labels, label_masks, _, bboxes) in enumerate(train_loader): if isinstance(inps, list): inps = [inp.cuda().requires_grad_() for inp in inps] else: inps = inps.cuda().requires_grad_() labels = labels.cuda() label_masks = label_masks.cuda() output = m(inps) if cfg.LOSS.get('TYPE') == 'MSELoss': loss = 0.5 * criterion(output.mul(label_masks), labels.mul(label_masks)) acc = calc_accuracy(output.mul(label_masks), labels.mul(label_masks)) else: loss = criterion(output, labels, label_masks) acc = calc_integral_accuracy(output, labels, label_masks, output_3d=False, norm_type=norm_type) if isinstance(inps, list): batch_size = inps[0].size(0) else: batch_size = inps.size(0) loss_logger.update(loss.item(), batch_size) acc_logger.update(acc, batch_size) optimizer.zero_grad() loss.backward() optimizer.step() opt.trainIters += 1 # Tensorboard if opt.board: board_writing(writer, loss_logger.avg, acc_logger.avg, opt.trainIters, 'Train') # Debug if opt.debug and not i % 10: debug_writing(writer, output, labels, inps, opt.trainIters) # TQDM train_loader.set_description( 'loss: {loss:.8f} | acc: {acc:.4f}'.format( loss=loss_logger.avg, acc=acc_logger.avg) ) train_loader.close() return loss_logger.avg, acc_logger.avg
def train(opt, train_loader, m, criterion, optimizer, writer, scaler): loggers = { 'joint_loss': DataLogger(), 'radius_loss': DataLogger(), 'loss': DataLogger(), 'acc': DataLogger(), 'acc_radius': DataLogger(), } m.train() train_dataset = train_loader.dataset train_loader = tqdm(train_loader, dynamic_ncols=True) radius_loss_item = -1 acc_radius = -1 for i, (inps, labels, label_masks, joint_radius_gt, _, bboxes) in enumerate(train_loader): if isinstance(inps, list): if opt.device.type != 'cpu': inps = [inp.cuda() for inp in inps] inps = [inp.requires_grad_() for inp in inps] else: if opt.device.type != 'cpu': inps = inps.cuda() inps = inps.requires_grad_() if opt.device.type != 'cpu': labels = labels.cuda() label_masks = label_masks.cuda() joint_radius_gt = joint_radius_gt.cuda() with autocast(): full_output = m(inps) joint_map = full_output.joints_map joints_radius = full_output.joints_radius if cfg.LOSS.get('TYPE') == 'MSELoss': assert criterion.reduction == "sum" coef = 1000 joint_loss = 0.5 * coef * criterion(joint_map.mul(label_masks), labels.mul(label_masks)) joint_loss /= label_masks.sum( ) * joint_map.shape[2] * joint_map.shape[3] loss = joint_loss if opt.fit_radius: radius_masks = label_masks[:, :, 0, 0] * (joint_radius_gt != -1) radius_loss = 0.5 * criterion( joint_radius_gt.mul(radius_masks), joints_radius.mul(radius_masks)) joint_loss /= radius_masks.sum() loss += radius_loss radius_loss_item = radius_loss.item() acc_radius = ((joint_radius_gt.mul(radius_masks) - joints_radius.mul(radius_masks)) < 1).sum() / float(joint_radius_gt.shape[0] * joint_radius_gt.shape[1]) acc = calc_accuracy(joint_map.mul(label_masks), labels.mul(label_masks)) else: raise NotImplementedError() loss = criterion(joint_map, labels, label_masks) acc = calc_integral_accuracy(joint_map, labels, label_masks, output_3d=False, norm_type=norm_type) if isinstance(inps, list): batch_size = inps[0].size(0) else: batch_size = inps.size(0) loggers["joint_loss"].update(joint_loss.item(), batch_size) loggers["radius_loss"].update(radius_loss_item, batch_size) loggers["loss"].update(loss.item(), batch_size) loggers["acc"].update(acc, batch_size) loggers["acc_radius"].update(acc_radius, batch_size) optimizer.zero_grad() scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() opt.trainIters += 1 # Tensorboard if opt.board: board_writing(writer, loggers, opt.trainIters, 'Train') # Debug if opt.debug and not i % 100: debug_image_index = 2526 debug_data = train_dataset[debug_image_index] (inps, labels, label_masks, joint_radius_gt, _, bboxes) = debug_data inps = inps[None, :] full_output = m(inps) joint_map = full_output.joints_map joints_radius = full_output.joints_radius debug_writing(writer, joint_map, joints_radius, labels[None, :], joint_radius_gt[None, :], inps, opt.trainIters) # TQDM train_loader.set_description(" | ".join( f"{name}:{logger.avg:.05f}" for name, logger in loggers.items())) train_loader.close() return loggers
def train(opt, train_loader, m, criterion, optimizer, writer): loss_logger = DataLogger() acc_logger = DataLogger() combined_loss = (cfg.LOSS.get('TYPE') == 'Combined') m.train() norm_type = cfg.LOSS.get('NORM_TYPE', None) train_loader = tqdm(train_loader, dynamic_ncols=True) for i, (inps, labels, label_masks, _, bboxes) in enumerate(train_loader): if isinstance(inps, list): inps = [inp.cuda().requires_grad_() for inp in inps] else: inps = inps.cuda().requires_grad_() if isinstance(labels, list): labels = [label.cuda() for label in labels] label_masks = [label_mask.cuda() for label_mask in label_masks] else: labels = labels.cuda() label_masks = label_masks.cuda() output = m(inps) if cfg.LOSS.get('TYPE') == 'MSELoss': loss = 0.5 * criterion(output.mul(label_masks), labels.mul(label_masks)) acc = calc_accuracy(output.mul(label_masks), labels.mul(label_masks)) elif cfg.LOSS.get('TYPE') == 'Combined': if output.size()[1] == 68: face_hand_num = 42 else: face_hand_num = 110 output_body_foot = output[:, :-face_hand_num, :, :] output_face_hand = output[:, -face_hand_num:, :, :] num_body_foot = output_body_foot.shape[1] num_face_hand = output_face_hand.shape[1] label_masks_body_foot = label_masks[0] label_masks_face_hand = label_masks[1] labels_body_foot = labels[0] labels_face_hand = labels[1] loss_body_foot = 0.5 * criterion[0](output_body_foot.mul(label_masks_body_foot), labels_body_foot.mul(label_masks_body_foot)) acc_body_foot = calc_accuracy(output_body_foot.mul(label_masks_body_foot), labels_body_foot.mul(label_masks_body_foot)) loss_face_hand = criterion[1](output_face_hand, labels_face_hand, label_masks_face_hand) acc_face_hand = calc_integral_accuracy(output_face_hand, labels_face_hand, label_masks_face_hand, output_3d=False, norm_type=norm_type) loss_body_foot *= 100 loss_face_hand *= 0.01 loss = loss_body_foot + loss_face_hand acc = acc_body_foot * num_body_foot / (num_body_foot + num_face_hand) + acc_face_hand * num_face_hand / (num_body_foot + num_face_hand) else: loss = criterion(output, labels, label_masks) acc = calc_integral_accuracy(output, labels, label_masks, output_3d=False, norm_type=norm_type) if isinstance(inps, list): batch_size = inps[0].size(0) else: batch_size = inps.size(0) loss_logger.update(loss.item(), batch_size) acc_logger.update(acc, batch_size) optimizer.zero_grad() loss.backward() optimizer.step() opt.trainIters += 1 # Tensorboard if opt.board: board_writing(writer, loss_logger.avg, acc_logger.avg, opt.trainIters, 'Train') # Debug if opt.debug and not i % 10: debug_writing(writer, output, labels, inps, opt.trainIters) # TQDM train_loader.set_description( 'loss: {loss:.8f} | acc: {acc:.4f}'.format( loss=loss_logger.avg, acc=acc_logger.avg) ) train_loader.close() return loss_logger.avg, acc_logger.avg
def train(opt, train_loader, m, criterion, optimizer, writer): loss_logger = DataLogger() acc_logger = DataLogger() m.train() norm_type = cfg.LOSS.get('NORM_TYPE', None) num_joints = cfg.DATA_PRESET.get('NUM_JOINTS',133) train_branch = cfg.OTHERS.get('TRAIN_BRANCH',True) train_loader = tqdm(train_loader, dynamic_ncols=True) for i, (inps, labels, label_masks, _, bboxes) in enumerate(train_loader): if isinstance(inps, list): inps = [inp.cuda().requires_grad_() for inp in inps] else: inps = inps.cuda().requires_grad_() out, feature = m(inps) # train for finer hands if train_branch: out = m.module.forward_branch(out,feature,bboxes[:,1,:],bboxes[:,2,:]) labels = labels[:,:-68*2].cuda() label_masks = label_masks[:,:-68*2].cuda() else: labels = labels[:,:133*2].cuda() label_masks = label_masks[:,:133*2].cuda() loss = criterion(out, labels, label_masks) acc = calc_integral_accuracy(out, labels, label_masks, output_3d=False, norm_type=norm_type) if isinstance(inps, list): batch_size = inps[0].size(0) else: batch_size = inps.size(0) loss_logger.update(loss.item(), batch_size) acc_logger.update(acc, batch_size) optimizer.zero_grad() loss.backward() optimizer.step() opt.trainIters += 1 # Tensorboard if opt.board: board_writing(writer, loss_logger.avg, acc_logger.avg, opt.trainIters, 'Train') # Debug if opt.debug and not i % 10: debug_writing(writer, output, labels, inps, opt.trainIters) # TQDM train_loader.set_description( 'loss: {loss:.8f} | acc: {acc:.4f}'.format( loss=loss_logger.avg, acc=acc_logger.avg) ) train_loader.close() return loss_logger.avg, acc_logger.avg