示例#1
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model = EAST()
    model = model.eval()
    model = model.to(device)
    model.load_state_dict(torch.load(args.trained_model))

    if os.path.exists(args.save_folder):
        shutil.rmtree(args.save_folder)

    os.mkdir(args.save_folder)

    test_process = tqdm(os.listdir(args.img_path), ascii=True)
    for img_file in test_process:
        test_process.set_description("Processing")

        img = Image.open(os.path.join(args.img_path, img_file))

        boxes = detect(img, model, device)
        #绘制boxes到图片上
        plot_img = plot_boxes(img, boxes)
        plot_img.save(os.path.join(args.save_folder, img_file))

        if args.show_image:
            plot_img.show()
示例#2
0
def eval_model(model_name, test_img_path, submit_path, save_flag=True):
    if os.path.exists(submit_path):
        shutil.rmtree(submit_path)
    os.mkdir(submit_path)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = EAST(False).to(device)
    model.load_state_dict(torch.load(model_name))
    model.eval()

    start_time = time.time()
    detect_dataset(model, device, test_img_path, submit_path)
    os.chdir(submit_path)
    res = subprocess.getoutput('zip -q submit.zip *.txt')
    res = subprocess.getoutput('mv submit.zip ../')
    os.chdir('../')
    res = subprocess.getoutput(
        'python ./evaluate/script.py –g=./evaluate/gt.zip –s=./submit.zip')
    print(res)
    os.remove('./submit.zip')
    print('eval time is {}'.format(time.time() - start_time))

    if not save_flag:
        shutil.rmtree(submit_path)
示例#3
0
	'''
	img_files = os.listdir(test_img_path)
	img_files = sorted([os.path.join(test_img_path, img_file) for img_file in img_files])
	
	for i, img_file in enumerate(img_files):
              try:
                print('evaluating {} image'.format(i))
                boxes = detect(Image.open(img_file), model, device)
                seq = []
                if boxes is not None:
                  seq.extend([','.join([str(int(b)) for b in box[:-1]]) + '\n' for box in boxes])
                with open(os.path.join(submit_path, os.path.basename(img_file).replace('.jpg','.txt')), 'w') as f:
                  f.writelines(seq)
              except:
                print('overload ram')


if __name__ == '__main__':
  img_path = '/content/test/'
  submit_path = '/content/res/'
  model_path  = './pths/east_vgg16.pth'

  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  model = EAST().to(device)
  model.load_state_dict(torch.load(model_path))
  model.eval()

  detect_dataset(model, device, img_path, submit_path)


示例#4
0
def train(train_ds_path,
          val_ds_path,
          pths_path,
          results_path,
          batch_size,
          lr,
          num_workers,
          train_iter,
          interval,
          opt_level=0,
          checkpoint_path=None,
          val_freq=10):
    torch.cuda.set_device(rank)

    tensorboard_dir = os.path.join(results_path, 'logs')
    checkpoints_dir = os.path.join(results_path, 'checkpoints')
    if rank == 0:
        os.makedirs(tensorboard_dir, exist_ok=True)
        os.makedirs(checkpoints_dir, exist_ok=True)
    barrier()

    try:
        logger.info('Importing AutoResume lib...')
        from userlib.auto_resume import AutoResume as auto_resume
        auto_resume.init()
        logger.info('Success!')
    except:
        logger.info('Failed!')
        auto_resume = None

    trainset = custom_dataset(
        os.path.join(train_ds_path, 'images'),
        os.path.join(train_ds_path, 'gt'),
    )

    valset = custom_dataset(os.path.join(val_ds_path, 'images'),
                            os.path.join(val_ds_path, 'gt'),
                            is_val=True)

    logger.info(f'World Size: {world_size}, Rank: {rank}')

    if world_size > 1:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            trainset)
        val_sampler = torch.utils.data.distributed.DistributedSampler(
            valset, shuffle=False)
    else:
        train_sampler = None
        val_sampler = None

    worker_init = LoaderWorkerProcessInit(rank, 43)
    train_loader = DataLoader(trainset,
                              batch_size=batch_size,
                              shuffle=train_sampler is None,
                              sampler=train_sampler,
                              num_workers=num_workers,
                              pin_memory=True,
                              drop_last=True,
                              worker_init_fn=worker_init)
    val_loader = DataLoader(valset,
                            batch_size=batch_size,
                            shuffle=False,
                            sampler=val_sampler,
                            num_workers=num_workers,
                            pin_memory=True,
                            drop_last=True,
                            worker_init_fn=worker_init)

    criterion = Loss()

    device = torch.device(
        f"cuda:{rank}" if torch.cuda.is_available() else "cpu")
    model = EAST()
    model.to(device)

    model = apex.parallel.convert_syncbn_model(model)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level=f'O{opt_level}')

    start_iter = 0
    if auto_resume is not None:
        auto_resume_details = auto_resume.get_resume_details()
        if auto_resume_details is not None:
            logger.info(
                'Detected that this is a resumption of a previous job!')
            checkpoint_path = auto_resume_details['CHECKPOINT_PATH']

    if checkpoint_path:
        logger.info(f'Loading checkpoint at path "{checkpoint_path}"...')
        checkpoint = torch.load(checkpoint_path, map_location=f'cuda:{rank}')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        amp.load_state_dict(checkpoint['amp_state'])
        start_iter = checkpoint['iter']
        logger.info('Done')

    data_parallel = False
    main_model = model
    if torch.distributed.is_initialized():
        logger.info(
            f'DataParallel: Using {torch.cuda.device_count()} devices!')
        model = DDP(model)
        data_parallel = True

    for param_group in optimizer.param_groups:
        param_group.setdefault('initial_lr', lr)
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=[train_iter // 2],
                                         gamma=0.1,
                                         last_epoch=start_iter)

    # This allows us to change dataset size without affecting things such as validation frequency
    steps_per_epoch = 1000 // (world_size * batch_size)

    step = start_iter
    start_epoch = step // steps_per_epoch
    epoch_iter = int(math.ceil(train_iter / steps_per_epoch))
    if rank == 0:
        logger.info('Initializing Tensorboard')
        writer = SummaryWriter(tensorboard_dir, purge_step=step)

    loss_meters = MeterDict(reset_on_value=True)
    val_loss_meters = MeterDict(reset_on_value=True)
    time_meters = MeterDict(reset_on_value=True)

    logger.info('Training')
    model.train()

    train_start_time = time.time()

    best_loss = 100

    train_iter = [iter(train_loader)]

    def get_batch():
        try:
            return next(train_iter[0])
        except:
            train_iter[0] = iter(train_loader)
            return get_batch()

    for epoch in range(start_epoch, epoch_iter):
        if train_sampler is not None:
            train_sampler.set_epoch(epoch)

        epoch_loss = 0
        epoch_time = time.time()
        start_time = time.time()

        model.train()

        for i in range(steps_per_epoch):
            batch = get_batch()

            optimizer.zero_grad()

            batch = [b.cuda(rank, non_blocking=True) for b in batch]

            img, gt_score, gt_geo, ignored_map = batch
            barrier()
            time_meters['batch_time'].add_sample(time.time() - start_time)

            pred_score, pred_geo = model(img)

            loss, details = criterion(gt_score, pred_score, gt_geo, pred_geo,
                                      ignored_map)

            epoch_loss += loss.detach().item()

            with amp.scale_loss(loss, optimizer) as loss_scaled:
                loss_scaled.backward()
            optimizer.step()

            barrier()
            time_meters['step_time'].add_sample(time.time() - start_time)

            details['global'] = loss.detach().item()

            for k, v in details.items():
                loss_meters[k].add_sample(v)

            if i % 10 == 0:
                logger.info(f'\tStep [{i+1}/{steps_per_epoch}]')

            start_time = time.time()
            step += 1
            scheduler.step()

            if step == train_iter:
                break

        term_requested = auto_resume is not None and auto_resume.termination_requested(
        )

        checkpoint_path = None
        if rank == 0:
            times = {k: m.value() for k, m in time_meters.items()}
            losses = {k: m.value() for k, m in loss_meters.items()}

            times['epoch'] = time.time() - epoch_time

            logger.info(
                f'Epoch is [{epoch+1}/{epoch_iter}], time consumption is {times}, batch_loss is {losses}'
            )

            for k, v in times.items():
                writer.add_scalar(f'performance/{k}', v, step)
            for k, v in losses.items():
                writer.add_scalar(f'loss/{k}', v, step)
            writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'],
                              step)

            if term_requested or (epoch + 1) % interval == 0:
                state_dict = main_model.state_dict()
                optim_state = optimizer.state_dict()

                checkpoint_path = os.path.join(
                    checkpoints_dir, 'model_epoch_{}.pth'.format(epoch + 1))
                logger.info(f'Saving checkpoint to "{checkpoint_path}"...')
                torch.save(
                    {
                        'model': state_dict,
                        'optimizer': optim_state,
                        'amp_state': amp.state_dict(),
                        'epoch': epoch + 1,
                        'iter': step
                    }, checkpoint_path)
                logger.info(f'Done')

        if (epoch + 1) % val_freq == 0 or step == train_iter:
            logger.info(f'Validating epoch {epoch+1}...')
            model.eval()
            val_loader.dataset.reset_random()
            with torch.no_grad():
                for i, batch in enumerate(val_loader):
                    batch = [b.cuda(rank, non_blocking=True) for b in batch]

                    img, gt_score, gt_geo, ignored_map = batch
                    barrier()

                    pred_score, pred_geo = model(img)

                    loss, details = criterion(gt_score, pred_score, gt_geo,
                                              pred_geo, ignored_map)
                    details['global'] = loss.detach().item()

                    barrier()

                    for k, v in details.items():
                        val_loss_meters[k].add_sample(v)

            print_dict = dict()
            for k, m in val_loss_meters.items():
                t = torch.tensor(m.value(),
                                 device=f'cuda:{rank}',
                                 dtype=torch.float32)
                if world_size > 1:
                    torch.distributed.reduce(t, 0)
                    t /= world_size
                if rank == 0:
                    writer.add_scalar(f'val/loss/{k}', t.item(), step)
                print_dict[k] = t.item()
            logger.info(f'\tLoss: {print_dict}')
            val_loss = print_dict['global']
            if rank == 0 and val_loss < best_loss:
                logger.info(
                    f'This is the best model so far. New loss: {val_loss}, previous: {best_loss}'
                )
                best_loss = val_loss
                shutil.copyfile(checkpoint_path,
                                os.path.join(checkpoints_dir, 'best.pth'))
            logger.info('Training')

        if term_requested:
            logger.warning('Termination requested! Exiting...')
            if rank == 0:
                auto_resume.request_resume(user_dict={
                    'CHECKPOINT_PATH': save_path,
                    'EPOCH': epoch
                })
            break

    logger.info(
        f'Finished training!!! Took {time.time()-train_start_time:0.3f} seconds!'
    )
示例#5
0
		submit_path  : submit result for evaluation
	'''
	img_files = os.listdir(test_img_path)
	img_files = sorted([os.path.join(test_img_path, img_file) for img_file in img_files])
	
	for i, img_file in enumerate(img_files):
		print('evaluating {} image'.format(i), end='\r')
		boxes = detect(Image.open(img_file), model, device)
		seq = []
		if boxes is not None:
			seq.extend([','.join([str(int(b)) for b in box[:-1]]) + '\n' for box in boxes])
		with open(os.path.join(submit_path, 'res_' + os.path.basename(img_file).replace('.jpg','.txt')), 'w') as f:
			f.writelines(seq)


if __name__ == '__main__':
	img_path    = '../ICDAR_2015/test_img/img_2.jpg' # 测试图片路径
	model_path  = './pths/east_vgg16.pth' # 训练好的模型
	res_img     = './res.bmp' # 保存的图片
	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
	model = EAST().to(device) # 模型分配给cpu或gpu
	model.load_state_dict(torch.load(model_path)) # 加载模型参数
	model.eval() # 将模型设置为评估模式,相当于self.train(False).
	img = Image.open(img_path) # 打开图片
	
	boxes = detect(img, model, device) # 进行图片测试
	plot_img = plot_boxes(img, boxes) # 将结果在图片上显示
	plot_img.save(res_img) # 保存图片