Exemplo n.º 1
0
	def __init__(self, node_name=''):

		self.rosparam_(node_name)

		self.create_logger_(node_name)
		self.logger.info('ROS parameters loaded successfully')
		self.logger.info(
			'********************** Start logging **********************')
		save_config_to_file(cfg, logger=self.logger)
		self.logger.info('ROS buffer_len: {}'.format(self.buffer_len))
		self.logger.info('ROS lidar_topic: {}'.format(self.lidar_topic))
		self.logger.info('ROS debug_flag: {}'.format(self.debug_flag))
		self.logger.info(
			'ROS depth_threshold: {}'.format(self.depth_threshold))
		self.logger.info(
			'ROS score_threshold: {}'.format(self.score_threshold))
		self.logger.info('ROS model_checkpoint: {}'.format(self.model_checkpoint))

		self.model = PointRCNN(num_classes=self.num_class,
							   use_xyz=True, mode='TEST')
		self.model.cuda()
		load_checkpoint(
			self.model,
			filename=str(self.base_dir / self.model_checkpoint),
			logger=self.logger)
		self.model.eval()

		self.logger.info('Model initialization complete')
Exemplo n.º 2
0
def repeat_eval_ckpt(root_result_dir, ckpt_dir):
    root_result_dir = os.path.join(root_result_dir, 'eval', 'eval_all_' + args.extra_tag)
    os.makedirs(root_result_dir, exist_ok=True)

    log_file = os.path.join(root_result_dir, 'log_eval_all_%s.txt' % cfg.TEST.SPLIT)
    logger = create_logger(log_file)
    logger.info('**********************Start logging**********************')

    # save config
    for key, val in vars(args).items():
        logger.info("{:16} {}".format(key, val))
    save_config_to_file(cfg, logger=logger)

    # create dataloader & network
    test_loader = create_dataloader(logger)
    model = PointRCNN(num_classes=test_loader.dataset.num_class, use_xyz=True, mode='TEST')
    model.cuda()

    # copy important files to backup
    backup_dir = os.path.join(root_result_dir, 'backup_files')
    os.makedirs(backup_dir, exist_ok=True)
    os.system('cp *.py %s/' % backup_dir)
    os.system('cp ../lib/net/*.py %s/' % backup_dir)
    os.system('cp ../lib/datasets/kitti_rcnn_dataset.py %s/' % backup_dir)

    # evaluated ckpt record
    ckpt_record_file = os.path.join(root_result_dir, 'eval_list_%s.txt' % cfg.TEST.SPLIT)
    with open(ckpt_record_file, 'a'):
        pass

    # tensorboard log
    tb_log = SummaryWriter(log_dir=os.path.join(root_result_dir, 'tensorboard_%s' % cfg.TEST.SPLIT))

    while True:
        # check whether there is checkpoint which is not evaluated
        cur_epoch_id, cur_ckpt = get_no_evaluated_ckpt(ckpt_dir, ckpt_record_file)
        if cur_epoch_id == -1 or int(float(cur_epoch_id)) < args.start_epoch:
            wait_second = 30
            print('Wait %s second for next check: %s' % (wait_second, ckpt_dir))
            time.sleep(wait_second)
            continue

        # load checkpoint
        train_utils.load_checkpoint(model, filename=cur_ckpt)

        # start evaluation
        cur_result_dir = os.path.join(root_result_dir, 'epoch_%s' % cur_epoch_id, cfg.TEST.SPLIT)
        tb_dict = eval_one_epoch(model, test_loader, cur_epoch_id, cur_result_dir, logger)

        step = int(float(cur_epoch_id))
        if step == float(cur_epoch_id):
            for key, val in tb_dict.items():
                tb_log.add_scalar(key, val, step)

        # record this epoch which has been evaluated
        with open(ckpt_record_file, 'a') as f:
            print('%s' % cur_epoch_id, file=f)
        logger.info('Epoch %s has been evaluated' % cur_epoch_id)
Exemplo n.º 3
0
def load_ckpt_based_on_args(model, logger):
    if args.ckpt is not None:
        train_utils.load_checkpoint(model, filename=args.ckpt, logger=logger)

    total_keys = model.state_dict().keys().__len__()
    if cfg.RPN.ENABLED and args.rpn_ckpt is not None:
        load_part_ckpt(model, filename=args.rpn_ckpt, logger=logger, total_keys=total_keys)

    if cfg.RCNN.ENABLED and args.rcnn_ckpt is not None:
        load_part_ckpt(model, filename=args.rcnn_ckpt, logger=logger, total_keys=total_keys)
Exemplo n.º 4
0
def load_ckpt_based_on_cfg(config, model, logger):
    if config['ckpt'] is not None:
        train_utils.load_checkpoint(model,
                                    filename=config['ckpt'],
                                    logger=logger)

    total_keys = model.state_dict().keys().__len__()
    if cfg.RPN.ENABLED and config['rpn_ckpt'] is not None:
        load_part_ckpt(model,
                       filename=config['rpn_ckpt'],
                       logger=logger,
                       total_keys=total_keys)

    if cfg.RCNN.ENABLED and config['rcnn_ckpt'] is not None:
        load_part_ckpt(model,
                       filename=config['rcnn_ckpt'],
                       logger=logger,
                       total_keys=total_keys)
Exemplo n.º 5
0
def load_ckpt_based_on_args(model, logger):
    """
    Input: model and logger instance
    Output: None
    Task: Loads ckpt based on the args --rpn_ckpt and  --rcnn_ckpt
    """
    if args.ckpt is not None:
        train_utils.load_checkpoint(model, filename=args.ckpt, logger=logger)

    total_keys = model.state_dict().keys().__len__()
    if cfg.RPN.ENABLED and args.rpn_ckpt is not None:
        load_part_ckpt(model,
                       filename=args.rpn_ckpt,
                       logger=logger,
                       total_keys=total_keys)

    if cfg.RCNN.ENABLED and args.rcnn_ckpt is not None:
        load_part_ckpt(model,
                       filename=args.rcnn_ckpt,
                       logger=logger,
                       total_keys=total_keys)
Exemplo n.º 6
0
def train():
    print(args.local_rank)
    torch.cuda.set_device(args.local_rank)
    # create dataloader & network & optimizer
    model, model_fn_decorator, net_func = create_model(cfg)
    init_weights(model, init_type='kaiming')
    model.cuda()
    root_result_dir = args.output_dir
    os.makedirs(root_result_dir, exist_ok=True)

    log_file = os.path.join(root_result_dir, "log_train.txt")
    logger = create_logger(log_file, get_rank())
    logger.info("**********************Start logging**********************")
    logger.info('TRAINED MODEL:{}'.format(net_func))

    # log to file
    gpu_list = os.environ[
        'CUDA_VISIBLE_DEVICES'] if 'CUDA_VISIBLE_DEVICES' in os.environ.keys(
        ) else 'ALL'
    logger.info("CUDA_VISIBLE_DEVICES=%s" % gpu_list)

    for key, val in vars(args).items():
        logger.info("{:16} {}".format(key, val))

    logger.info("***********************config infos**********************")
    for key, val in vars(cfg).items():
        logger.info("{:16} {}".format(key, val))

    # log tensorboard
    if get_rank() == 0:
        tb_log = SummaryWriter(
            log_dir=os.path.join(root_result_dir, "tensorboard"))
    else:
        tb_log = None

    train_loader, test_loader = create_dataloader()
    # train_loader, test_loader = create_dataloader_Insensee()

    optimizer = create_optimizer(model)

    # load checkpoint if it is possible
    start_epoch = it = best_res = 0
    last_epoch = -1
    if args.ckpt is not None:
        pure_model = model
        it, start_epoch, best_res = load_checkpoint(pure_model, optimizer,
                                                    args.ckpt, logger)
        last_epoch = start_epoch + 1

    lr_scheduler = create_scheduler(optimizer, last_epoch=last_epoch)
    # lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.98, last_epoch=-1)

    criterion = None

    # start training
    logger.info('**********************Start training**********************')
    ckpt_dir = os.path.join(root_result_dir, "ckpt")
    os.makedirs(ckpt_dir, exist_ok=True)
    trainer = train_utils.Trainer(model,
                                  model_fn=model_fn_decorator(),
                                  criterion=criterion,
                                  optimizer=optimizer,
                                  ckpt_dir=ckpt_dir,
                                  lr_scheduler=lr_scheduler,
                                  model_fn_eval=model_fn_decorator(),
                                  tb_log=tb_log,
                                  logger=logger,
                                  eval_frequency=1,
                                  cfg=cfg)

    trainer.train(start_it=it,
                  start_epoch=start_epoch,
                  n_epochs=args.epochs,
                  train_loader=train_loader,
                  test_loader=test_loader,
                  ckpt_save_interval=args.ckpt_save_interval,
                  best_res=best_res)

    logger.info('**********************End training**********************')
Exemplo n.º 7
0
                      use_xyz=True,
                      mode='TRAIN')
    optimizer = create_optimizer(model)

    if args.mgpus:
        model = nn.DataParallel(model)
    model.cuda()

    # load checkpoint if it is possible
    start_epoch = it = 0
    last_epoch = -1
    if args.ckpt is not None:
        pure_model = model.module if isinstance(
            model, torch.nn.DataParallel) else model
        it, start_epoch = train_utils.load_checkpoint(pure_model,
                                                      optimizer,
                                                      filename=args.ckpt,
                                                      logger=logger)
        last_epoch = start_epoch + 1

    lr_scheduler, bnm_scheduler = create_scheduler(
        optimizer,
        total_steps=len(train_loader) * args.epochs,
        last_epoch=last_epoch)

    if args.rpn_ckpt is not None:
        pure_model = model.module if isinstance(
            model, torch.nn.DataParallel) else model
        total_keys = pure_model.state_dict().keys().__len__()
        train_utils.load_part_ckpt(pure_model,
                                   filename=args.rpn_ckpt,
                                   logger=logger,
Exemplo n.º 8
0
    # create dataloader & network & optimizer
    train_loader, test_loader = create_dataloader(logger)
    model = PointRCNN(num_classes=train_loader.dataset.num_class, use_xyz=True, mode='TRAIN')
    optimizer = create_optimizer(model)

    if args.mgpus:
        model = nn.DataParallel(model)
    model.cuda()

    # load checkpoint if it is possible
    start_iter = it = 0
    last_iter = -1

    if args.pretrain_ckpt is not None:
        pure_model = model.module if isinstance(model, torch.nn.DataParallel) else model
        _, _ = train_utils.load_checkpoint(pure_model, None, filename=args.pretrain_ckpt, logger=logger)
        it = int(args.total_iters*9/30)

    if args.ckpt is not None:
        pure_model = model.module if isinstance(model, torch.nn.DataParallel) else model
        it, _ = train_utils.load_checkpoint(pure_model, optimizer, filename=args.ckpt, logger=logger)
        last_iter = it + 1

    lr_scheduler, bnm_scheduler = create_scheduler(optimizer, total_steps=args.total_iters, last_iter=last_iter)

    lr_warmup_scheduler = None

    # start training
    logger.info('**********************Start training**********************')
    logger.info('experiment ID: %s/%s/%s'%(root_result_dir.split('/')[-3], root_result_dir.split('/')[-2], root_result_dir.split('/')[-1]))