def main(): if config.output_dir is None: config.output_dir = 'output' if config.restart_training: shutil.rmtree(config.output_dir, ignore_errors=True) if not os.path.exists(config.output_dir): os.makedirs(config.output_dir) logger = setup_logger(os.path.join(config.output_dir, 'train_log')) logger.info(config.print()) torch.manual_seed(config.seed) # 为CPU设置随机种子 if config.gpu_id is not None and torch.cuda.is_available(): torch.backends.cudnn.benchmark = True logger.info('train with gpu {} and pytorch {}'.format( config.gpu_id, torch.__version__)) device = torch.device("cuda:0") torch.cuda.manual_seed(config.seed) # 为当前GPU设置随机种子 torch.cuda.manual_seed_all(config.seed) # 为所有GPU设置随机种子 else: logger.info('train with cpu and pytorch {}'.format(torch.__version__)) device = torch.device("cpu") train_data = TibetanDataset(config.json_path, data_shape=config.data_shape, n=config.n, m=config.m, transform=transforms.ToTensor(), base_path=config.base_path) train_loader = Data.DataLoader(dataset=train_data, batch_size=config.train_batch_size, shuffle=True, num_workers=int(config.workers)) writer = SummaryWriter(config.output_dir) model = PSENet(backbone=config.backbone, pretrained=config.pretrained, result_num=config.n, scale=config.scale) if not config.pretrained and not config.restart_training: model.apply(weights_init) num_gpus = torch.cuda.device_count() if num_gpus > 1: model = nn.DataParallel(model) model = model.to(device) # dummy_input = torch.autograd.Variable(torch.Tensor(1, 3, 600, 800).to(device)) # writer.add_graph(models=models, input_to_model=dummy_input) criterion = PSELoss(Lambda=config.Lambda, ratio=config.OHEM_ratio, reduction='mean') # optimizer = torch.optim.SGD(models.parameters(), lr=config.lr, momentum=0.99) optimizer = torch.optim.Adam(model.parameters(), lr=config.lr) if config.checkpoint != '' and not config.restart_training: start_epoch = load_checkpoint(config.checkpoint, model, logger, device, optimizer) start_epoch += 1 scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.lr_decay_step, gamma=config.lr_gamma, last_epoch=start_epoch) else: start_epoch = config.start_epoch scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, config.lr_decay_step, gamma=config.lr_gamma) all_step = len(train_loader) logger.info('train dataset has {} samples,{} in dataloader'.format( train_data.__len__(), all_step)) epoch = 0 best_model = {'recall': 0, 'precision': 0, 'f1': 0, 'models': ''} try: for epoch in range(start_epoch, config.epochs): start = time.time() train_loss, lr = train_epoch(model, optimizer, scheduler, train_loader, device, criterion, epoch, all_step, writer, logger) logger.info( '[{}/{}], train_loss: {:.4f}, time: {:.4f}, lr: {}'.format( epoch, config.epochs, train_loss, time.time() - start, lr)) # net_save_path = '{}/PSENet_{}_loss{:.6f}.pth'.format(config.output_dir, epoch, # train_loss) # save_checkpoint(net_save_path, models, optimizer, epoch, logger) if (0.3 < train_loss < 0.4 and epoch % 4 == 0) or train_loss < 0.3: recall, precision, f1 = merge_eval(model=model, save_path=os.path.join( config.output_dir, 'output'), test_path=config.testroot, device=device, base_path=config.base_path, use_sub=config.use_sub) logger.info( 'test: recall: {:.6f}, precision: {:.6f}, f1: {:.6f}'. format(recall, precision, f1)) net_save_path = '{}/PSENet_{}_loss{:.6f}_r{:.6f}_p{:.6f}_f1{:.6f}.pth'.format( config.output_dir, epoch, train_loss, recall, precision, f1) save_checkpoint(net_save_path, model, optimizer, epoch, logger) if f1 > best_model['f1']: best_path = glob.glob(config.output_dir + '/Best_*.pth') for b_path in best_path: if os.path.exists(b_path): os.remove(b_path) best_model['recall'] = recall best_model['precision'] = precision best_model['f1'] = f1 best_model['models'] = net_save_path best_save_path = '{}/Best_{}_r{:.6f}_p{:.6f}_f1{:.6f}.pth'.format( config.output_dir, epoch, recall, precision, f1) if os.path.exists(net_save_path): shutil.copyfile(net_save_path, best_save_path) else: save_checkpoint(best_save_path, model, optimizer, epoch, logger) pse_path = glob.glob(config.output_dir + '/PSENet_*.pth') for p_path in pse_path: if os.path.exists(p_path): os.remove(p_path) writer.add_scalar(tag='Test/recall', scalar_value=recall, global_step=epoch) writer.add_scalar(tag='Test/precision', scalar_value=precision, global_step=epoch) writer.add_scalar(tag='Test/f1', scalar_value=f1, global_step=epoch) writer.close() except KeyboardInterrupt: save_checkpoint('{}/final.pth'.format(config.output_dir), model, optimizer, epoch, logger) finally: if best_model['models']: logger.info(best_model)
def main(): config.workspace = os.path.join(config.workspace_dir, config.exp_name) if not os.path.exists(config.workspace): os.makedirs(config.workspace) logger = setup_logger(os.path.join(config.workspace, 'train_log')) logger.info(config.pprint()) torch.manual_seed(config.seed) # 为CPU设置随机种子 torch.backends.cudnn.benchmark = True logger.info('train with gpu {} and pytorch {}'.format( config.gpu_id, torch.__version__)) device = torch.device("cuda:0") torch.cuda.manual_seed(config.seed) # 为当前GPU设置随机种子 torch.cuda.manual_seed_all(config.seed) # 为所有GPU设置随机种子 train_data = TotalTextoader(config.train_data_dir, config.train_gt_dir, config.test_data_dir, config.test_gt_dir, split='train', is_transform=True, img_size=config.data_shape, kernel_num=config.kernel_num, min_scale=config.min_scale) train_loader = Data.DataLoader(dataset=train_data, batch_size=config.train_batch_size, shuffle=True, num_workers=int(config.workers)) model = PSENet(backbone=config.backbone, pretrained=config.pretrained, result_num=config.kernel_num, scale=config.scale) if not config.pretrained and not config.restart_training: model.apply(weights_init) num_gpus = torch.cuda.device_count() # if num_gpus > 1: model = nn.DataParallel(model) model = model.to(device) criterion = dice_loss optimizer = torch.optim.Adam(model.parameters(), lr=config.lr) if config.checkpoint != '' and config.restart_training == True: start_epoch = load_checkpoint(config.checkpoint, model, logger, device, optimizer) start_epoch += 1 scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.lr_decay_step, gamma=config.lr_gamma, last_epoch=start_epoch) logger.info('resume from {}, epoch={}'.format(config.checkpoint, start_epoch)) else: start_epoch = 1 scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, config.lr_decay_step, gamma=config.lr_gamma) all_step = len(train_loader) logger.info('train dataset has {} samples,{} iters in dataloader'.format( train_data.__len__(), all_step)) for epoch in range(start_epoch, config.epochs + 1): start = time.time() train_loss, lr = train_epoch(model, optimizer, scheduler, train_loader, device, criterion, epoch, all_step, logger) logger.info('[{}/{}], train_loss: {:.4f}, time: {:.4f}, lr: {}'.format( epoch, config.epochs, train_loss, time.time() - start, lr)) if epoch % config.save_interval == 0: save_path = '{}/epoch_{}.pth'.format(config.workspace, epoch) latest_path = '{}/latest.pth'.format(config.workspace) save_checkpoint(save_path, model, optimizer, epoch, logger) save_checkpoint(latest_path, model, optimizer, epoch, logger)
def main(): config.workspace = os.path.join(config.workspace_dir, config.exp_name) if config.restart_training: shutil.rmtree(config.workspace, ignore_errors=True) if not os.path.exists(config.workspace): os.makedirs(config.workspace) shutil.rmtree(os.path.join(config.workspace, 'train_log'), ignore_errors=True) logger = setup_logger(os.path.join(config.workspace, 'train_log')) logger.info(config.print()) torch.manual_seed(config.seed) # 为CPU设置随机种子 if config.gpu_id is not None and torch.cuda.is_available(): torch.backends.cudnn.benchmark = True logger.info('train with gpu {} and pytorch {}'.format( config.gpu_id, torch.__version__)) device = torch.device("cuda:0") torch.cuda.manual_seed(config.seed) # 为当前GPU设置随机种子 torch.cuda.manual_seed_all(config.seed) # 为所有GPU设置随机种子 else: logger.info('train with cpu and pytorch {}'.format(torch.__version__)) device = torch.device("cpu") train_data = ICDAR17(config.trainroot, data_shape=config.data_shape, n=config.kernel_num, m=config.min_scale) train_loader = Data.DataLoader(dataset=train_data, batch_size=config.train_batch_size, shuffle=True, num_workers=int(config.workers)) # writer = SummaryWriter(config.output_dir) model = PSENet(backbone=config.backbone, pretrained=config.pretrained, result_num=config.kernel_num, scale=config.scale) if not config.pretrained and not config.restart_training: model.apply(weights_init) num_gpus = torch.cuda.device_count() if num_gpus > 1: model = nn.DataParallel(model) model = model.to(device) criterion = PSELoss(Lambda=config.Lambda, ratio=config.OHEM_ratio, reduction='mean') # optimizer = torch.optim.SGD(models.parameters(), lr=config.lr, momentum=0.99) optimizer = torch.optim.Adam(model.parameters(), lr=config.lr) if config.checkpoint != '' and not config.restart_training: start_epoch = load_checkpoint(config.checkpoint, model, logger, device, optimizer) start_epoch += 1 scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.lr_decay_step, gamma=config.lr_gamma, last_epoch=start_epoch) else: start_epoch = config.start_epoch scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, config.lr_decay_step, gamma=config.lr_gamma) all_step = len(train_loader) logger.info('train dataset has {} samples,{} in dataloader'.format( train_data.__len__(), all_step)) epoch = 0 f1 = 0 try: for epoch in range(start_epoch, config.epochs): start = time.time() train_loss, lr = train_epoch(model, optimizer, scheduler, train_loader, device, criterion, epoch, all_step, logger) logger.info( '[{}/{}], train_loss: {:.4f}, time: {:.4f}, lr: {}'.format( epoch, config.epochs, train_loss, time.time() - start, lr)) save_path = '{}/epoch_model.pth'.format(config.workspace) save_checkpoint(save_path, model, optimizer, epoch, logger) if epoch >= 50 and epoch % 10 == 0: f_score_new = eval(model, os.path.join(config.workspace, 'output'), config.testroot, device) logger.info(' ---------------------------------------') logger.info(' test: f_score : {:.6f}'.format(f_score_new)) logger.info(' ---------------------------------------') if f_score_new > f1: f1 = f_score_new best_save_path = '{}/Best_model_{:.6f}.pth'.format( config.workspace, f1) save_checkpoint(best_save_path, model, optimizer, epoch, logger) # writer.add_scalar(tag='Test/recall', scalar_value=recall, global_step=epoch) # writer.add_scalar(tag='Test/precision', scalar_value=precision, global_step=epoch) # writer.add_scalar(tag='Test/f1', scalar_value=f1, global_step=epoch) # writer.close() except KeyboardInterrupt: save_checkpoint('{}/final.pth'.format(config.workspace), model, optimizer, epoch, logger)
def main(): if config.output_dir is None: config.output_dir = 'output' if config.restart_training: shutil.rmtree(config.output_dir, ignore_errors=True) if not os.path.exists(config.output_dir): os.makedirs(config.output_dir) logger = setup_logger(os.path.join(config.output_dir, 'train_log')) logger.info(config.print()) torch.manual_seed(config.seed) # 为CPU设置随机种子 if config.gpu_id is not None and torch.cuda.is_available(): torch.backends.cudnn.benchmark = True logger.info('train with gpu {} and pytorch {}'.format( config.gpu_id, torch.__version__)) device = torch.device("cuda:0") torch.cuda.manual_seed(config.seed) # 为当前GPU设置随机种子 torch.cuda.manual_seed_all(config.seed) # 为所有GPU设置随机种子 else: logger.info('train with cpu and pytorch {}'.format(torch.__version__)) device = torch.device("cpu") train_data = MyDataset(args.train_dir, data_shape=config.data_shape, n=config.n, m=config.m, transform=transforms.ToTensor()) train_loader = Data.DataLoader(dataset=train_data, batch_size=args.batch_size, shuffle=True, num_workers=int(config.workers)) writer = SummaryWriter(config.output_dir) model = PSENet(backbone=config.backbone, pretrained=config.pretrained, result_num=config.n, scale=config.scale) if not config.pretrained and not config.restart_training: model.apply(weights_init) if args.resume_model: resume_model(model, args.resume_model) num_gpus = torch.cuda.device_count() if num_gpus > 1: model = nn.DataParallel(model) model = model.to(device) # dummy_input = torch.autograd.Variable(torch.Tensor(1, 3, 600, 800).to(device)) # writer.add_graph(models=models, input_to_model=dummy_input) criterion = PSELoss(Lambda=config.Lambda, ratio=config.OHEM_ratio, reduction='mean') # optimizer = torch.optim.SGD(models.parameters(), lr=config.lr, momentum=0.99) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) if config.checkpoint != '' and not config.restart_training: start_epoch = load_checkpoint(config.checkpoint, model, logger, device, optimizer) start_epoch += 1 scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.lr_decay_step, gamma=config.lr_gamma, last_epoch=start_epoch) else: start_epoch = config.start_epoch scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, config.lr_decay_step, gamma=config.lr_gamma) all_step = len(train_loader) logger.info('train dataset has {} samples,{} in dataloader'.format( train_data.__len__(), all_step)) epoch = 0 best_model = {'recall': 0, 'precision': 0, 'f1': 0, 'models': ''} try: for epoch in range(start_epoch, args.epochs): start = time.time() train_loss, lr = train_epoch(model, optimizer, scheduler, train_loader, device, criterion, epoch, all_step, writer, logger) logger.info( '[{}/{}], train_loss: {:.4f}, time: {:.4f}, lr: {}'.format( epoch, config.epochs, train_loss, time.time() - start, lr)) if epoch % args.save_per_epoch == 0: save_model(model, epoch) writer.close() except KeyboardInterrupt: save_checkpoint('{}/final.pth'.format(config.output_dir), model, optimizer, epoch, logger) finally: if best_model['models']: logger.info(best_model)
def main(): config.workspace = os.path.join(config.workspace_dir, config.exp_name) # if config.restart_training: # shutil.rmtree(config.workspace, ignore_errors=True) if not os.path.exists(config.workspace): os.makedirs(config.workspace) logger = setup_logger(os.path.join(config.workspace, 'train_log')) logger.info(config.print()) torch.manual_seed(config.seed) # 为CPU设置随机种子 if config.gpu_id is not None and torch.cuda.is_available(): torch.backends.cudnn.benchmark = True logger.info('train with gpu {} and pytorch {}'.format( config.gpu_id, torch.__version__)) device = torch.device("cuda:0") torch.cuda.manual_seed(config.seed) # 为当前GPU设置随机种子 torch.cuda.manual_seed_all(config.seed) # 为所有GPU设置随机种子 else: logger.info('train with cpu and pytorch {}'.format(torch.__version__)) device = torch.device("cpu") train_data = ICDAR15(config.trainroot, config.is_pseudo, data_shape=config.data_shape, n=config.kernel_num, m=config.m) train_loader = Data.DataLoader(dataset=train_data, batch_size=config.train_batch_size, shuffle=True, num_workers=int(config.workers)) # writer = SummaryWriter(config.output_dir) model = PSENet(backbone=config.backbone, pretrained=config.pretrained, result_num=config.kernel_num, scale=config.scale) if not config.pretrained and not config.restart_training: model.apply(weights_init) num_gpus = torch.cuda.device_count() if num_gpus > 1: model = nn.DataParallel(model) model = model.to(device) criterion = PSELoss(Lambda=config.Lambda, ratio=config.OHEM_ratio, reduction='mean') optimizer = torch.optim.Adam(model.parameters(), lr=config.lr) if config.checkpoint != '' and not config.restart_training: start_epoch = load_checkpoint(config.checkpoint, model, logger, device, optimizer) start_epoch += 1 scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.lr_decay_step, gamma=config.lr_gamma, last_epoch=start_epoch) logger.info('resume from {}, epoch={}'.format(config.checkpoint, start_epoch)) else: start_epoch = config.start_epoch scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, config.lr_decay_step, gamma=config.lr_gamma) all_step = len(train_loader) logger.info('train dataset has {} samples,{} in dataloader'.format( train_data.__len__(), all_step)) epoch = 0 f1 = 0 for epoch in range(start_epoch, config.epochs): start = time.time() train_loss, lr = train_epoch(model, optimizer, scheduler, train_loader, device, criterion, epoch, all_step, logger) logger.info('[{}/{}], train_loss: {:.4f}, time: {:.4f}, lr: {}'.format( epoch, config.epochs, train_loss, time.time() - start, lr)) if epoch % config.save_interval == 0: save_path = '{}/epoch_{}.pth'.format(config.workspace, epoch) latest_path = '{}/latest.pth'.format(config.workspace) save_checkpoint(save_path, model, optimizer, epoch, logger) save_checkpoint(latest_path, model, optimizer, epoch, logger)