Пример #1
0
def save_pseudo_label_epoch(model, val_loader, rank, leave_pbar, ps_label_dir,
                            cur_epoch):
    """
    Generate pseudo label with given model.

    Args:
        model: model to predict result for pseudo label
        val_loader: data_loader to predict pseudo label
        rank: process rank
        leave_pbar: tqdm bar controller
        ps_label_dir: dir to save pseudo label
        cur_epoch
    """
    val_dataloader_iter = iter(val_loader)
    total_it_each_epoch = len(val_loader)

    if rank == 0:
        pbar = tqdm.tqdm(total=total_it_each_epoch,
                         leave=leave_pbar,
                         desc='generate_ps_e%d' % cur_epoch,
                         dynamic_ncols=True)

    pos_ps_meter = common_utils.AverageMeter()
    ign_ps_meter = common_utils.AverageMeter()

    model.eval()

    for cur_it in range(total_it_each_epoch):
        try:
            target_batch = next(val_dataloader_iter)
        except StopIteration:
            target_dataloader_iter = iter(val_loader)
            target_batch = next(target_dataloader_iter)

        # generate gt_boxes for target_batch and update model weights
        with torch.no_grad():
            load_data_to_gpu(target_batch)
            pred_dicts, ret_dict = model(target_batch)

        pos_ps_batch, ign_ps_batch = save_pseudo_label_batch(
            target_batch,
            pred_dicts=pred_dicts,
            need_update=(cfg.SELF_TRAIN.get('MEMORY_ENSEMBLE', None)
                         and cfg.SELF_TRAIN.MEMORY_ENSEMBLE.ENABLED
                         and cur_epoch > 0))

        # log to console and tensorboard
        pos_ps_meter.update(pos_ps_batch)
        ign_ps_meter.update(ign_ps_batch)
        disp_dict = {
            'pos_ps_box':
            "{:.3f}({:.3f})".format(pos_ps_meter.val, pos_ps_meter.avg),
            'ign_ps_box':
            "{:.3f}({:.3f})".format(ign_ps_meter.val, ign_ps_meter.avg)
        }

        if rank == 0:
            pbar.update()
            pbar.set_postfix(disp_dict)
            pbar.refresh()

    if rank == 0:
        pbar.close()

    gather_and_dump_pseudo_label_result(rank, ps_label_dir, cur_epoch)
Пример #2
0
def train_one_epoch(model,
                    optimizer,
                    train_loader,
                    model_func,
                    lr_scheduler,
                    accumulated_iter,
                    optim_cfg,
                    rank,
                    tbar,
                    total_it_each_epoch,
                    dataloader_iter,
                    tb_log=None,
                    leave_pbar=False):
    if total_it_each_epoch == len(train_loader):
        dataloader_iter = iter(train_loader)

    if rank == 0:
        pbar = tqdm.tqdm(total=total_it_each_epoch,
                         leave=leave_pbar,
                         desc='train',
                         dynamic_ncols=True)
        data_time = common_utils.AverageMeter()
        batch_time = common_utils.AverageMeter()
        forward_time = common_utils.AverageMeter()

    end = time.time()
    for cur_it in range(total_it_each_epoch):
        try:
            batch = next(dataloader_iter)
        except StopIteration:
            dataloader_iter = iter(train_loader)
            batch = next(dataloader_iter)
            print('new iters')

        data_timer = time.time()
        cur_data_time = data_timer - end

        lr_scheduler.step(accumulated_iter)

        try:
            cur_lr = float(optimizer.lr)
        except:
            cur_lr = optimizer.param_groups[0]['lr']

        if tb_log is not None:
            tb_log.add_scalar('meta_data/learning_rate', cur_lr,
                              accumulated_iter)

        model.train()
        optimizer.zero_grad()

        loss, tb_dict, disp_dict = model_func(model, batch)

        loss.backward()
        clip_grad_norm_(model.parameters(), optim_cfg.GRAD_NORM_CLIP)
        optimizer.step()

        accumulated_iter += 1

        cur_forward_time = time.time() - data_timer
        cur_batch_time = time.time() - end
        end = time.time()

        # average reduce
        avg_data_time = commu_utils.average_reduce_value(cur_data_time)
        avg_forward_time = commu_utils.average_reduce_value(cur_forward_time)
        avg_batch_time = commu_utils.average_reduce_value(cur_batch_time)

        # log to console and tensorboard
        if rank == 0:
            data_time.update(avg_data_time)
            forward_time.update(avg_forward_time)
            batch_time.update(avg_batch_time)
            disp_dict.update({
                'loss':
                loss.item(),
                'lr':
                cur_lr,
                'd_time':
                f'{data_time.val:.2f}({data_time.avg:.2f})',
                'f_time':
                f'{forward_time.val:.2f}({forward_time.avg:.2f})',
                'b_time':
                f'{batch_time.val:.2f}({batch_time.avg:.2f})'
            })

            pbar.update()
            pbar.set_postfix(dict(total_it=accumulated_iter))
            tbar.set_postfix(disp_dict)
            tbar.refresh()

            if tb_log is not None:
                tb_log.add_scalar('train/loss', loss, accumulated_iter)
                tb_log.add_scalar('meta_data/learning_rate', cur_lr,
                                  accumulated_iter)
                for key, val in tb_dict.items():
                    tb_log.add_scalar('train/' + key, val, accumulated_iter)
    if rank == 0:
        pbar.close()
    return accumulated_iter
Пример #3
0
def save_pseudo_label_batch(input_dict, pred_dicts=None, need_update=True):
    """
    Save pseudo label for give batch.
    If model is given, use model to inference pred_dicts,
    otherwise, directly use given pred_dicts.

    Args:
        input_dict: batch data read from dataloader
        pred_dicts: Dict if not given model.
            predict results to be generated pseudo label and saved
        need_update: Bool.
            If set to true, use consistency matching to update pseudo label
    """
    pos_ps_meter = common_utils.AverageMeter()
    ign_ps_meter = common_utils.AverageMeter()

    batch_size = len(pred_dicts)
    for b_idx in range(batch_size):
        pred_cls_scores = pred_iou_scores = None
        if 'pred_boxes' in pred_dicts[b_idx]:
            # Exist predicted boxes passing self-training score threshold
            pred_boxes = pred_dicts[b_idx]['pred_boxes'].detach().cpu().numpy()
            pred_labels = pred_dicts[b_idx]['pred_labels'].detach().cpu(
            ).numpy()
            pred_scores = pred_dicts[b_idx]['pred_scores'].detach().cpu(
            ).numpy()
            if 'pred_cls_scores' in pred_dicts[b_idx]:
                pred_cls_scores = pred_dicts[b_idx]['pred_cls_scores'].detach(
                ).cpu().numpy()
            if 'pred_iou_scores' in pred_dicts[b_idx]:
                pred_iou_scores = pred_dicts[b_idx]['pred_iou_scores'].detach(
                ).cpu().numpy()

            labels_ignore_scores = np.array(
                cfg.SELF_TRAIN.SCORE_THRESH)[pred_labels - 1]
            ignore_mask = pred_scores < labels_ignore_scores
            pred_labels[ignore_mask] = -1

            gt_box = np.concatenate((pred_boxes, pred_labels.reshape(
                -1, 1), pred_scores.reshape(-1, 1)),
                                    axis=1)

        else:
            # no predicted boxes passes self-training score threshold
            gt_box = np.zeros((0, 9), dtype=np.float32)

        gt_infos = {
            'gt_boxes': gt_box,
            'cls_scores': pred_cls_scores,
            'iou_scores': pred_iou_scores,
            'memory_counter': np.zeros(gt_box.shape[0])
        }

        # record pseudo label to pseudo label dict
        if need_update:
            ensemble_func = getattr(memory_ensemble_utils,
                                    cfg.SELF_TRAIN.MEMORY_ENSEMBLE.NAME)
            gt_infos = ensemble_func(
                PSEUDO_LABELS[input_dict['frame_id'][b_idx]], gt_infos,
                cfg.SELF_TRAIN.MEMORY_ENSEMBLE)

        if gt_infos['gt_boxes'].shape[0] > 0:
            ign_ps_meter.update((gt_infos['gt_boxes'][:, 7] < 0).sum())
        else:
            ign_ps_meter.update(0)
        pos_ps_meter.update(gt_infos['gt_boxes'].shape[0] - ign_ps_meter.val)

        NEW_PSEUDO_LABELS[input_dict['frame_id'][b_idx]] = gt_infos

    return pos_ps_meter.avg, ign_ps_meter.avg
Пример #4
0
def train_one_epoch_st(model,
                       optimizer,
                       source_reader,
                       target_loader,
                       model_func,
                       lr_scheduler,
                       accumulated_iter,
                       optim_cfg,
                       rank,
                       tbar,
                       total_it_each_epoch,
                       dataloader_iter,
                       tb_log=None,
                       leave_pbar=False,
                       ema_model=None):
    if total_it_each_epoch == len(target_loader):
        dataloader_iter = iter(target_loader)

    if rank == 0:
        pbar = tqdm.tqdm(total=total_it_each_epoch,
                         leave=leave_pbar,
                         desc='train',
                         dynamic_ncols=True)

    ps_bbox_meter = common_utils.AverageMeter()
    ignore_ps_bbox_meter = common_utils.AverageMeter()
    st_loss_meter = common_utils.AverageMeter()

    disp_dict = {}

    for cur_it in range(total_it_each_epoch):
        lr_scheduler.step(accumulated_iter)

        try:
            cur_lr = float(optimizer.lr)
        except:
            cur_lr = optimizer.param_groups[0]['lr']

        if tb_log is not None:
            tb_log.add_scalar('meta_data/learning_rate', cur_lr,
                              accumulated_iter)

        model.train()

        optimizer.zero_grad()
        try:
            target_batch = next(dataloader_iter)
        except StopIteration:
            dataloader_iter = iter(target_loader)
            target_batch = next(dataloader_iter)
            print('new iters')

        # parameters for save pseudo label on the fly
        st_loss, st_tb_dict, st_disp_dict = model_func(model, target_batch)
        st_loss.backward()
        st_loss_meter.update(st_loss.item())

        # count number of used ps bboxes in this batch
        pos_pseudo_bbox = target_batch['pos_ps_bbox'].mean().item()
        ign_pseudo_bbox = target_batch['ign_ps_bbox'].mean().item()
        ps_bbox_meter.update(pos_pseudo_bbox)
        ignore_ps_bbox_meter.update(ign_pseudo_bbox)

        st_tb_dict = common_utils.add_prefix_to_dict(st_tb_dict, 'st_')
        disp_dict.update(common_utils.add_prefix_to_dict(st_disp_dict, 'st_'))
        disp_dict.update({
            'st_loss':
            "{:.3f}({:.3f})".format(st_loss_meter.val, st_loss_meter.avg),
            'pos_ps_box':
            ps_bbox_meter.avg,
            'ign_ps_box':
            ignore_ps_bbox_meter.avg
        })

        clip_grad_norm_(model.parameters(), optim_cfg.GRAD_NORM_CLIP)
        optimizer.step()
        accumulated_iter += 1

        # log to console and tensorboard
        if rank == 0:
            pbar.update()
            pbar.set_postfix(
                dict(total_it=accumulated_iter,
                     pos_ps_box=ps_bbox_meter.val,
                     ign_ps_box=ignore_ps_bbox_meter.val))
            tbar.set_postfix(disp_dict)
            tbar.refresh()

            if tb_log is not None:
                tb_log.add_scalar('meta_data/learning_rate', cur_lr,
                                  accumulated_iter)
                tb_log.add_scalar('train/st_loss', st_loss, accumulated_iter)
                tb_log.add_scalar('train/pos_ps_bbox', ps_bbox_meter.val,
                                  accumulated_iter)
                tb_log.add_scalar('train/ign_ps_bbox',
                                  ignore_ps_bbox_meter.val, accumulated_iter)
                for key, val in st_tb_dict.items():
                    tb_log.add_scalar('train/' + key, val, accumulated_iter)
    if rank == 0:
        pbar.close()
        tb_log.add_scalar('train/epoch_ign_ps_box', ignore_ps_bbox_meter.avg,
                          accumulated_iter)
        tb_log.add_scalar('train/epoch_pos_ps_box', ps_bbox_meter.avg,
                          accumulated_iter)
    return accumulated_iter