def save_pseudo_label_epoch(model, val_loader, rank, leave_pbar, ps_label_dir, cur_epoch): """ Generate pseudo label with given model. Args: model: model to predict result for pseudo label val_loader: data_loader to predict pseudo label rank: process rank leave_pbar: tqdm bar controller ps_label_dir: dir to save pseudo label cur_epoch """ val_dataloader_iter = iter(val_loader) total_it_each_epoch = len(val_loader) if rank == 0: pbar = tqdm.tqdm(total=total_it_each_epoch, leave=leave_pbar, desc='generate_ps_e%d' % cur_epoch, dynamic_ncols=True) pos_ps_meter = common_utils.AverageMeter() ign_ps_meter = common_utils.AverageMeter() model.eval() for cur_it in range(total_it_each_epoch): try: target_batch = next(val_dataloader_iter) except StopIteration: target_dataloader_iter = iter(val_loader) target_batch = next(target_dataloader_iter) # generate gt_boxes for target_batch and update model weights with torch.no_grad(): load_data_to_gpu(target_batch) pred_dicts, ret_dict = model(target_batch) pos_ps_batch, ign_ps_batch = save_pseudo_label_batch( target_batch, pred_dicts=pred_dicts, need_update=(cfg.SELF_TRAIN.get('MEMORY_ENSEMBLE', None) and cfg.SELF_TRAIN.MEMORY_ENSEMBLE.ENABLED and cur_epoch > 0)) # log to console and tensorboard pos_ps_meter.update(pos_ps_batch) ign_ps_meter.update(ign_ps_batch) disp_dict = { 'pos_ps_box': "{:.3f}({:.3f})".format(pos_ps_meter.val, pos_ps_meter.avg), 'ign_ps_box': "{:.3f}({:.3f})".format(ign_ps_meter.val, ign_ps_meter.avg) } if rank == 0: pbar.update() pbar.set_postfix(disp_dict) pbar.refresh() if rank == 0: pbar.close() gather_and_dump_pseudo_label_result(rank, ps_label_dir, cur_epoch)
def train_one_epoch(model, optimizer, train_loader, model_func, lr_scheduler, accumulated_iter, optim_cfg, rank, tbar, total_it_each_epoch, dataloader_iter, tb_log=None, leave_pbar=False): if total_it_each_epoch == len(train_loader): dataloader_iter = iter(train_loader) if rank == 0: pbar = tqdm.tqdm(total=total_it_each_epoch, leave=leave_pbar, desc='train', dynamic_ncols=True) data_time = common_utils.AverageMeter() batch_time = common_utils.AverageMeter() forward_time = common_utils.AverageMeter() end = time.time() for cur_it in range(total_it_each_epoch): try: batch = next(dataloader_iter) except StopIteration: dataloader_iter = iter(train_loader) batch = next(dataloader_iter) print('new iters') data_timer = time.time() cur_data_time = data_timer - end lr_scheduler.step(accumulated_iter) try: cur_lr = float(optimizer.lr) except: cur_lr = optimizer.param_groups[0]['lr'] if tb_log is not None: tb_log.add_scalar('meta_data/learning_rate', cur_lr, accumulated_iter) model.train() optimizer.zero_grad() loss, tb_dict, disp_dict = model_func(model, batch) loss.backward() clip_grad_norm_(model.parameters(), optim_cfg.GRAD_NORM_CLIP) optimizer.step() accumulated_iter += 1 cur_forward_time = time.time() - data_timer cur_batch_time = time.time() - end end = time.time() # average reduce avg_data_time = commu_utils.average_reduce_value(cur_data_time) avg_forward_time = commu_utils.average_reduce_value(cur_forward_time) avg_batch_time = commu_utils.average_reduce_value(cur_batch_time) # log to console and tensorboard if rank == 0: data_time.update(avg_data_time) forward_time.update(avg_forward_time) batch_time.update(avg_batch_time) disp_dict.update({ 'loss': loss.item(), 'lr': cur_lr, 'd_time': f'{data_time.val:.2f}({data_time.avg:.2f})', 'f_time': f'{forward_time.val:.2f}({forward_time.avg:.2f})', 'b_time': f'{batch_time.val:.2f}({batch_time.avg:.2f})' }) pbar.update() pbar.set_postfix(dict(total_it=accumulated_iter)) tbar.set_postfix(disp_dict) tbar.refresh() if tb_log is not None: tb_log.add_scalar('train/loss', loss, accumulated_iter) tb_log.add_scalar('meta_data/learning_rate', cur_lr, accumulated_iter) for key, val in tb_dict.items(): tb_log.add_scalar('train/' + key, val, accumulated_iter) if rank == 0: pbar.close() return accumulated_iter
def save_pseudo_label_batch(input_dict, pred_dicts=None, need_update=True): """ Save pseudo label for give batch. If model is given, use model to inference pred_dicts, otherwise, directly use given pred_dicts. Args: input_dict: batch data read from dataloader pred_dicts: Dict if not given model. predict results to be generated pseudo label and saved need_update: Bool. If set to true, use consistency matching to update pseudo label """ pos_ps_meter = common_utils.AverageMeter() ign_ps_meter = common_utils.AverageMeter() batch_size = len(pred_dicts) for b_idx in range(batch_size): pred_cls_scores = pred_iou_scores = None if 'pred_boxes' in pred_dicts[b_idx]: # Exist predicted boxes passing self-training score threshold pred_boxes = pred_dicts[b_idx]['pred_boxes'].detach().cpu().numpy() pred_labels = pred_dicts[b_idx]['pred_labels'].detach().cpu( ).numpy() pred_scores = pred_dicts[b_idx]['pred_scores'].detach().cpu( ).numpy() if 'pred_cls_scores' in pred_dicts[b_idx]: pred_cls_scores = pred_dicts[b_idx]['pred_cls_scores'].detach( ).cpu().numpy() if 'pred_iou_scores' in pred_dicts[b_idx]: pred_iou_scores = pred_dicts[b_idx]['pred_iou_scores'].detach( ).cpu().numpy() labels_ignore_scores = np.array( cfg.SELF_TRAIN.SCORE_THRESH)[pred_labels - 1] ignore_mask = pred_scores < labels_ignore_scores pred_labels[ignore_mask] = -1 gt_box = np.concatenate((pred_boxes, pred_labels.reshape( -1, 1), pred_scores.reshape(-1, 1)), axis=1) else: # no predicted boxes passes self-training score threshold gt_box = np.zeros((0, 9), dtype=np.float32) gt_infos = { 'gt_boxes': gt_box, 'cls_scores': pred_cls_scores, 'iou_scores': pred_iou_scores, 'memory_counter': np.zeros(gt_box.shape[0]) } # record pseudo label to pseudo label dict if need_update: ensemble_func = getattr(memory_ensemble_utils, cfg.SELF_TRAIN.MEMORY_ENSEMBLE.NAME) gt_infos = ensemble_func( PSEUDO_LABELS[input_dict['frame_id'][b_idx]], gt_infos, cfg.SELF_TRAIN.MEMORY_ENSEMBLE) if gt_infos['gt_boxes'].shape[0] > 0: ign_ps_meter.update((gt_infos['gt_boxes'][:, 7] < 0).sum()) else: ign_ps_meter.update(0) pos_ps_meter.update(gt_infos['gt_boxes'].shape[0] - ign_ps_meter.val) NEW_PSEUDO_LABELS[input_dict['frame_id'][b_idx]] = gt_infos return pos_ps_meter.avg, ign_ps_meter.avg
def train_one_epoch_st(model, optimizer, source_reader, target_loader, model_func, lr_scheduler, accumulated_iter, optim_cfg, rank, tbar, total_it_each_epoch, dataloader_iter, tb_log=None, leave_pbar=False, ema_model=None): if total_it_each_epoch == len(target_loader): dataloader_iter = iter(target_loader) if rank == 0: pbar = tqdm.tqdm(total=total_it_each_epoch, leave=leave_pbar, desc='train', dynamic_ncols=True) ps_bbox_meter = common_utils.AverageMeter() ignore_ps_bbox_meter = common_utils.AverageMeter() st_loss_meter = common_utils.AverageMeter() disp_dict = {} for cur_it in range(total_it_each_epoch): lr_scheduler.step(accumulated_iter) try: cur_lr = float(optimizer.lr) except: cur_lr = optimizer.param_groups[0]['lr'] if tb_log is not None: tb_log.add_scalar('meta_data/learning_rate', cur_lr, accumulated_iter) model.train() optimizer.zero_grad() try: target_batch = next(dataloader_iter) except StopIteration: dataloader_iter = iter(target_loader) target_batch = next(dataloader_iter) print('new iters') # parameters for save pseudo label on the fly st_loss, st_tb_dict, st_disp_dict = model_func(model, target_batch) st_loss.backward() st_loss_meter.update(st_loss.item()) # count number of used ps bboxes in this batch pos_pseudo_bbox = target_batch['pos_ps_bbox'].mean().item() ign_pseudo_bbox = target_batch['ign_ps_bbox'].mean().item() ps_bbox_meter.update(pos_pseudo_bbox) ignore_ps_bbox_meter.update(ign_pseudo_bbox) st_tb_dict = common_utils.add_prefix_to_dict(st_tb_dict, 'st_') disp_dict.update(common_utils.add_prefix_to_dict(st_disp_dict, 'st_')) disp_dict.update({ 'st_loss': "{:.3f}({:.3f})".format(st_loss_meter.val, st_loss_meter.avg), 'pos_ps_box': ps_bbox_meter.avg, 'ign_ps_box': ignore_ps_bbox_meter.avg }) clip_grad_norm_(model.parameters(), optim_cfg.GRAD_NORM_CLIP) optimizer.step() accumulated_iter += 1 # log to console and tensorboard if rank == 0: pbar.update() pbar.set_postfix( dict(total_it=accumulated_iter, pos_ps_box=ps_bbox_meter.val, ign_ps_box=ignore_ps_bbox_meter.val)) tbar.set_postfix(disp_dict) tbar.refresh() if tb_log is not None: tb_log.add_scalar('meta_data/learning_rate', cur_lr, accumulated_iter) tb_log.add_scalar('train/st_loss', st_loss, accumulated_iter) tb_log.add_scalar('train/pos_ps_bbox', ps_bbox_meter.val, accumulated_iter) tb_log.add_scalar('train/ign_ps_bbox', ignore_ps_bbox_meter.val, accumulated_iter) for key, val in st_tb_dict.items(): tb_log.add_scalar('train/' + key, val, accumulated_iter) if rank == 0: pbar.close() tb_log.add_scalar('train/epoch_ign_ps_box', ignore_ps_bbox_meter.avg, accumulated_iter) tb_log.add_scalar('train/epoch_pos_ps_box', ps_bbox_meter.avg, accumulated_iter) return accumulated_iter