Пример #1
0
def load_pretrained_model(model, pretrained_model):

    if os.path.exists(pretrained_model):
        para_state_dict = paddle.load(pretrained_model)

        model_state_dict = model.state_dict()
        keys = model_state_dict.keys()
        num_params_loaded = 0
        for k in keys:
            if k not in para_state_dict:
                logger.warning("{} is not in pretrained model".format(k))
            elif list(para_state_dict[k].shape) != list(
                    model_state_dict[k].shape):
                logger.warning(
                    "[SKIP] Shape of pretrained params {} doesn't match.(Pretrained: {}, Actual: {})"
                    .format(k, para_state_dict[k].shape,
                            model_state_dict[k].shape))
            else:
                model_state_dict[k] = para_state_dict[k]
                num_params_loaded += 1
        model.set_dict(model_state_dict)
        logger.info("There are {}/{} variables loaded into {}.".format(
            num_params_loaded, len(model_state_dict),
            model.__class__.__name__))

    else:
        raise ValueError(
            'The pretrained model directory is not Found: {}'.format(
                pretrained_model))
Пример #2
0
def main(args):

    paddle.set_device("gpu")

    cfg = Config(args.cfg)
    val_dataset = cfg.val_dataset
    if val_dataset is None:
        raise RuntimeError(
            'The verification dataset is not specified in the configuration file.'
        )
    elif len(val_dataset) == 0:
        raise ValueError(
            'The length of val_dataset is 0. Please check if your dataset is valid'
        )

    msg = '\n---------------Config Information---------------\n'
    msg += str(cfg)
    msg += '------------------------------------------------'
    logger.info(msg)

    model = cfg.model
    if args.model_path:
        load_pretrained_model(model, args.model_path)
        logger.info('Loaded trained params of model successfully')

    evaluate(model,
             val_dataset,
             num_workers=args.num_workers,
             output_dir=args.output_dir)
Пример #3
0
def main(args):

    paddle.set_device("gpu")

    cfg = Config(args.cfg)

    model = cfg.model
    model.eval()
    if args.model_path:
        load_pretrained_model(model, args.model_path)
        logger.info('Loaded trained params of model successfully')
    K = np.array([[[2055.56, 0, 939.658], [0, 2055.56, 641.072], [0, 0, 1]]],
                 np.float32)
    K_inverse = np.linalg.inv(K)
    K_inverse = paddle.to_tensor(K_inverse)

    img, ori_img_size, output_size = get_img(args.input_path)

    ratio = get_ratio(ori_img_size, output_size)
    ratio = paddle.to_tensor(ratio)
    cam_info = [K_inverse, ratio]
    total_pred = model(img, cam_info)

    keep_idx = paddle.nonzero(total_pred[:, -1] > 0.25)
    total_pred = paddle.gather(total_pred, keep_idx)

    if total_pred.shape[0] > 0:
        pred_dimensions = total_pred[:, 6:9]
        pred_dimensions = pred_dimensions.roll(shifts=1, axis=1)
        pred_rotys = total_pred[:, 12]
        pred_locations = total_pred[:, 9:12]
        bbox_3d = encode_box3d(pred_rotys, pred_dimensions, pred_locations,
                               paddle.to_tensor(K), (1280, 1920))
    else:
        bbox_3d = total_pred

    img_draw = cv2.imread(args.input_path)
    for idx in range(bbox_3d.shape[0]):
        bbox = bbox_3d[idx]
        bbox = bbox.transpose([1, 0]).numpy()
        img_draw = draw_box_3d(img_draw, bbox)

    cv2.imwrite(args.output_path, img_draw)
Пример #4
0
def resume(model, optimizer, resume_model):
    if resume_model is not None:
        logger.info('Resume model from {}'.format(resume_model))
        if os.path.exists(resume_model):
            resume_model = os.path.normpath(resume_model)
            ckpt_path = os.path.join(resume_model, 'model.pdparams')
            para_state_dict = paddle.load(ckpt_path)
            ckpt_path = os.path.join(resume_model, 'model.pdopt')
            opti_state_dict = paddle.load(ckpt_path)
            model.set_state_dict(para_state_dict)
            optimizer.set_state_dict(opti_state_dict)

            iter = resume_model.split('_')[-1]
            iter = int(iter)
            return iter
        else:
            raise ValueError(
                'Directory of the model needed to resume is not Found: {}'.
                format(resume_model))
    else:
        logger.info('No model needed to resume.')
Пример #5
0
def main(args):

    paddle.set_device("gpu")

    cfg = Config(args.cfg,
                 learning_rate=args.learning_rate,
                 iters=args.iters,
                 batch_size=args.batch_size)

    train_dataset = cfg.train_dataset
    if train_dataset is None:
        raise RuntimeError(
            'The training dataset is not specified in the configuration file.')
    elif len(train_dataset) == 0:
        raise ValueError(
            'The length of train_dataset is 0. Please check if your dataset is valid'
        )
    val_dataset = None  #cfg.val_dataset  if args.do_eval else None
    losses = cfg.loss

    msg = '\n---------------Config Information---------------\n'
    msg += str(cfg)
    msg += '------------------------------------------------'
    logger.info(msg)

    train(cfg.model,
          train_dataset,
          val_dataset=val_dataset,
          optimizer=cfg.optimizer,
          loss_computation=cfg.loss,
          save_dir=args.save_dir,
          iters=cfg.iters,
          batch_size=cfg.batch_size,
          resume_model=args.resume_model,
          save_interval=args.save_interval,
          log_iters=args.log_iters,
          num_workers=args.num_workers,
          keep_checkpoint_max=args.keep_checkpoint_max)
Пример #6
0
def evaluate(model,
             eval_dataset,
             num_workers=0,
             output_dir="./output",
             print_detail=True):
    """
    Launch evalution.

    Args:
        model(nn.Layer): A model.
        eval_dataset (paddle.io.Dataset): Used to read and process validation datasets.
        num_workers (int, optional): Num workers for data loader. Default: 0.
        print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True.

    Returns:
        float: The mIoU of validation datasets.
        float: The accuracy of validation datasets.
    """
    model.eval()

    batch_sampler = paddle.io.BatchSampler(eval_dataset,
                                           batch_size=1,
                                           shuffle=False,
                                           drop_last=False)
    loader = paddle.io.DataLoader(
        eval_dataset,
        batch_sampler=batch_sampler,
        num_workers=num_workers,
        return_list=True,
    )

    total_iters = len(loader)

    if print_detail:
        logger.info(
            "Start evaluating (total_samples={}, total_iters={})...".format(
                len(eval_dataset), total_iters))
    progbar_val = progbar.Progbar(target=total_iters, verbose=1)
    reader_cost_averager = TimeAverager()
    batch_cost_averager = TimeAverager()
    batch_start = time.time()
    predictions = {}
    with paddle.no_grad():
        for cur_iter, batch in enumerate(loader):
            reader_cost_averager.record(time.time() - batch_start)
            images, targets, image_ids = batch[0], batch[1], batch[2]

            output = model(images, targets)

            output = output.numpy()
            predictions.update({img_id: output for img_id in image_ids})

            batch_cost_averager.record(time.time() - batch_start,
                                       num_samples=len(targets))
            batch_cost = batch_cost_averager.get_average()
            reader_cost = reader_cost_averager.get_average()

            if print_detail:
                progbar_val.update(cur_iter + 1,
                                   [('batch_cost', batch_cost),
                                    ('reader cost', reader_cost)])
            reader_cost_averager.reset()
            batch_cost_averager.reset()
            batch_start = time.time()

    kitti_evaluation(eval_dataset, predictions, output_dir=output_dir)
Пример #7
0
def train(model,
          train_dataset,
          val_dataset=None,
          optimizer=None,
          loss_computation=None,
          save_dir='output',
          iters=10000,
          batch_size=2,
          resume_model=None,
          save_interval=1000,
          log_iters=10,
          num_workers=0,
          keep_checkpoint_max=5):
    """
    Launch training.

    Args:
        model(nn.Layer): A sementic segmentation model.
        train_dataset (paddle.io.Dataset): Used to read and process training datasets.
        val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
        optimizer (paddle.optimizer.Optimizer): The optimizer.
        loss_computation (nn.Layer): A loss function.
        save_dir (str, optional): The directory for saving the model snapshot. Default: 'output'.
        iters (int, optional): How may iters to train the model. Defualt: 10000.
        batch_size (int, optional): Mini batch size of one gpu or cpu. Default: 2.
        resume_model (str, optional): The path of resume model.
        save_interval (int, optional): How many iters to save a model snapshot once during training. Default: 1000.
        log_iters (int, optional): Display logging information at every log_iters. Default: 10.
        num_workers (int, optional): Num workers for data loader. Default: 0.
        keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5.
    """
    model.train()
    nranks = paddle.distributed.ParallelEnv().nranks
    local_rank = paddle.distributed.ParallelEnv().local_rank

    start_iter = 0
    if resume_model is not None:
        start_iter = resume(model, optimizer, resume_model)

    if not os.path.isdir(save_dir):
        if os.path.exists(save_dir):
            os.remove(save_dir)
        os.makedirs(save_dir)

    if nranks > 1:
        # Initialize parallel environment if not done.
        if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized(
        ):
            paddle.distributed.init_parallel_env()
            ddp_model = paddle.DataParallel(model)
        else:
            ddp_model = paddle.DataParallel(model)

    batch_sampler = paddle.io.DistributedBatchSampler(train_dataset,
                                                      batch_size=batch_size,
                                                      shuffle=True,
                                                      drop_last=True)

    loader = paddle.io.DataLoader(
        train_dataset,
        batch_sampler=batch_sampler,
        num_workers=num_workers,
        return_list=True,
    )

    # VisualDL log
    log_writer = LogWriter(save_dir)

    avg_loss = 0.0
    avg_loss_dict = {}
    iters_per_epoch = len(batch_sampler)

    reader_cost_averager = TimeAverager()
    batch_cost_averager = TimeAverager()
    save_models = deque()
    batch_start = time.time()

    iter = start_iter
    while iter < iters:
        for data in loader:
            iter += 1
            if iter > iters:
                break
            reader_cost_averager.record(time.time() - batch_start)
            images = data[0]
            targets = data[1]

            if nranks > 1:
                predictions = ddp_model(images)
            else:
                predictions = model(images)

            loss_dict = loss_computation(predictions, targets)
            loss = sum(loss for loss in loss_dict.values())
            loss.backward()

            optimizer.step()
            lr = optimizer.get_lr()
            if isinstance(optimizer._learning_rate,
                          paddle.optimizer.lr.LRScheduler):
                optimizer._learning_rate.step()
            model.clear_gradients()
            avg_loss += loss.numpy()[0]  # get the value
            if len(avg_loss_dict) == 0:
                avg_loss_dict = {k: v.numpy()[0] for k, v in loss_dict.items()}
            else:
                for key, value in loss_dict.items():
                    avg_loss_dict[key] += value.numpy()[0]

            batch_cost_averager.record(time.time() - batch_start,
                                       num_samples=batch_size)

            if (iter) % log_iters == 0 and local_rank == 0:
                avg_loss /= log_iters
                for key, value in avg_loss_dict.items():
                    avg_loss_dict[key] /= log_iters

                remain_iters = iters - iter
                avg_train_batch_cost = batch_cost_averager.get_average()
                avg_train_reader_cost = reader_cost_averager.get_average()
                eta = calculate_eta(remain_iters, avg_train_batch_cost)
                logger.info(
                    "[TRAIN] epoch={}, iter={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.5f} | ETA {}"
                    .format((iter - 1) // iters_per_epoch + 1, iter, iters,
                            avg_loss, lr, avg_train_batch_cost,
                            avg_train_reader_cost, eta))

                ######################### VisualDL Log ##########################
                log_writer.add_scalar('Train/loss', avg_loss, iter)
                # Record all losses if there are more than 2 losses.
                for key, value in avg_loss_dict.items():
                    log_tag = 'Train/' + key
                    log_writer.add_scalar(log_tag, value, iter)

                log_writer.add_scalar('Train/lr', lr, iter)
                log_writer.add_scalar('Train/batch_cost', avg_train_batch_cost,
                                      iter)
                log_writer.add_scalar('Train/reader_cost',
                                      avg_train_reader_cost, iter)
                #################################################################

                avg_loss = 0.0
                avg_loss_list = {}
                reader_cost_averager.reset()
                batch_cost_averager.reset()

            if (iter % save_interval == 0
                    or iter == iters) and local_rank == 0:
                current_save_dir = os.path.join(save_dir,
                                                "iter_{}".format(iter))
                if not os.path.isdir(current_save_dir):
                    os.makedirs(current_save_dir)
                paddle.save(model.state_dict(),
                            os.path.join(current_save_dir, 'model.pdparams'))
                paddle.save(optimizer.state_dict(),
                            os.path.join(current_save_dir, 'model.pdopt'))
                save_models.append(current_save_dir)
                if len(save_models) > keep_checkpoint_max > 0:
                    model_to_remove = save_models.popleft()
                    shutil.rmtree(model_to_remove)

            batch_start = time.time()

    # Sleep for half a second to let dataloader release resources.
    time.sleep(0.5)
    log_writer.close()