loss.backward() optimizer.step() print('Train batch loss: ', train_batch_loss[-1]) train_loss.append(sum(train_batch_loss) / len(train_batch_loss)) print('***Train loss***: ', train_loss[-1]) val_batch_loss = list() for X_batch_val, gt_score, gt_geo in val_dataloader: X_batch_val = X_batch_val.to(device) gt_score = gt_score.to(device) gt_geo = gt_geo.to(device) pred_score, pred_geo = model(X_batch_val) loss = loss_fn(gt_score, pred_score, gt_geo, pred_geo) val_batch_loss.append(loss.item()) torch.save(model, './east.pt') print('Val batch loss: ', val_batch_loss[-1]) val_loss.append(sum(val_batch_loss) / len(val_batch_loss)) print('***Validation loss**: ', val_loss[-1]) if best_val_loss > val_loss[-1]: best_val_loss = val_loss[-1] torch.save(model.state_dict(), './east1.pt') print('Save!') print('best val loss: ', best_val_loss)
def main(): hmean = .0 is_best = False warnings.simplefilter('ignore', np.RankWarning) # Prepare for dataset print('EAST <==> Prepare <==> DataLoader <==> Begin') # train_root_path = os.path.abspath(os.path.join('./dataset/', 'train')) train_root_path = cfg.dataroot train_img = os.path.join(train_root_path, 'img') train_gt = os.path.join(train_root_path, 'gt') trainset = custom_dset(train_img, train_gt) train_loader = DataLoader(trainset, batch_size=cfg.train_batch_size_per_gpu * cfg.gpu, shuffle=True, collate_fn=collate_fn, num_workers=cfg.num_workers) print('EAST <==> Prepare <==> Batch_size:{} <==> Begin'.format( cfg.train_batch_size_per_gpu * cfg.gpu)) print('EAST <==> Prepare <==> DataLoader <==> Done') # test datalodaer """ for i in range(100000): for j, (a,b,c,d) in enumerate(train_loader): print(i, j,'/',len(train_loader)) """ # Model print('EAST <==> Prepare <==> Network <==> Begin') model = East() model = nn.DataParallel(model, device_ids=cfg.gpu_ids) model = model.cuda() init_weights(model, init_type=cfg.init_type) cudnn.benchmark = True criterion = LossFunc() optimizer = torch.optim.Adam(model.parameters(), lr=cfg.lr) scheduler = lr_scheduler.StepLR(optimizer, step_size=10000, gamma=0.94) # init or resume if cfg.resume and os.path.isfile(cfg.checkpoint): weightpath = os.path.abspath(cfg.checkpoint) print( "EAST <==> Prepare <==> Loading checkpoint '{}' <==> Begin".format( weightpath)) checkpoint = torch.load(weightpath) start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print( "EAST <==> Prepare <==> Loading checkpoint '{}' <==> Done".format( weightpath)) else: start_epoch = 0 print('EAST <==> Prepare <==> Network <==> Done') for epoch in range(start_epoch, cfg.max_epochs): train(train_loader, model, criterion, scheduler, optimizer, epoch) if epoch % cfg.eval_iteration == 0: # create res_file and img_with_box output_txt_dir_path = predict(model, criterion, epoch) # Zip file submit_path = MyZip(output_txt_dir_path, epoch) # submit and compute Hmean hmean_ = compute_hmean(submit_path) if hmean_ > hmean: is_best = True state = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'is_best': is_best, } save_checkpoint(state, epoch)
def main(): warnings.simplefilter('ignore', np.RankWarning) #Model video_root_path = os.path.abspath('./dataset/train/') video_name_list = sorted( [p for p in os.listdir(video_root_path) if p.split('_')[0] == 'Video']) #print('video_name_list', video_name_list) print('EAST <==> Prepare <==> Network <==> Begin') model = East() AGD_model = AGD() model = nn.DataParallel(model, device_ids=cfg.gpu_ids) #AGD_model = nn.DataParallel(AGD_model, device_ids=cfg.gpu_ids) model = model.cuda() AGD_model = AGD_model.cuda() init_weights(model, init_type=cfg.init_type) cudnn.benchmark = True criterion1 = LossFunc() # criterion2 = Ass_loss() optimizer1 = torch.optim.Adam(model.parameters(), lr=cfg.lr) optimizer2 = torch.optim.Adam(AGD_model.parameters(), lr=cfg.lr) scheduler = lr_scheduler.StepLR(optimizer1, step_size=10000, gamma=0.94) # init or resume if cfg.resume and os.path.isfile(cfg.checkpoint): weightpath = os.path.abspath(cfg.checkpoint) print( "EAST <==> Prepare <==> Loading checkpoint '{}' <==> Begin".format( weightpath)) checkpoint = torch.load(weightpath) start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) #AGD_model.load_state_dict(checkpoint['model2.state_dict']) optimizer1.load_state_dict(checkpoint['optimizer']) #optimizer2.load_state_dict(checkpoint['optimizer2']) print( "EAST <==> Prepare <==> Loading checkpoint '{}' <==> Done".format( weightpath)) else: start_epoch = 0 print('EAST <==> Prepare <==> Network <==> Done') for epoch in range(start_epoch + 1, cfg.max_epochs): for video_name in video_name_list: print( 'EAST <==> epoch:{} <==> Prepare <==> DataLoader <==>{} Begin'. format(epoch, video_name)) trainset = custom_dset(os.path.join(video_root_path, video_name)) #sampler = sampler_for_video_clip(len(trainset)) train_loader = DataLoader(trainset, batch_size=cfg.train_batch_size_per_gpu * cfg.gpu, shuffle=False, collate_fn=collate_fn, num_workers=cfg.num_workers, drop_last=True) print('EAST <==> Prepare <==> Batch_size:{} <==> Begin'.format( cfg.train_batch_size_per_gpu * cfg.gpu)) print( 'EAST <==> epoch:{} <==> Prepare <==> DataLoader <==>{} Done'. format(epoch, video_name)) train(train_loader, model, AGD_model, criterion1, criterion2, scheduler, optimizer1, optimizer2, epoch) ''' for i, (img, score_map, geo_map, training_mask, coord_ids) in enumerate(train_loader): print('i{} img.shape:{} geo_map.shape{} training_mask.shape{} coord_ids.len{}'.format(i, score_map.shape, geo_map.shape, training_mask.shape, len(coord_ids))) ''' if epoch % cfg.eval_iteration == 0: state = { 'epoch': epoch, 'model1.state_dict': model.state_dict(), 'model2.state_dict': AGD_model.state_dict(), 'optimizer1': optimizer1.state_dict(), 'optimizer2': optimizer2.state_dict() } save_checkpoint(state, epoch)