def merge(args): # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, split=args.split, is_transform=True, no_gt=args.no_gt, seed=args.seed) n_classes = loader.n_classes avg_y_prob = np.zeros((loader.__len__(), n_classes), dtype=np.float32) fold_list = [] for prob_file_name in glob.glob('*.npy'): prob = np.load(prob_file_name) avg_y_prob = avg_y_prob + prob fold_list.append(prob_file_name) avg_y_prob = avg_y_prob / len(fold_list) avgprob_file_name = 'prob_{}_avg.npy'.format(len(fold_list)) np.save(avgprob_file_name, avg_y_prob) # Create submission csv_file_name = 'submission.csv' sub = pd.read_csv(os.path.join(data_path, 'sample_submission.csv'), index_col=0) sub[loader.class_names] = avg_y_prob sub.to_csv(csv_file_name)
def validate(cfg, model_path): assert model_path is not None, 'Not assert model path' use_cuda = False if cfg.get("cuda", None) is not None: if cfg.get("cuda", None) != "all": os.environ["CUDA_VISIBLE_DEVICES"] = cfg.get("cuda", None) use_cuda = torch.cuda.is_available() # Setup Dataloader train_loader, val_loader = get_loader(cfg) loss_fn = get_loss_fn(cfg) # Load Model model = get_model(cfg) if use_cuda: model.cuda() loss_fn.cuda() checkpoint = torch.load(model_path) if torch.cuda.device_count() > 1: # multi gpus model = torch.nn.DataParallel( model, device_ids=list(range(torch.cuda.device_count()))) state = checkpoint["state_dict"] else: # 1 gpu state = convert_state_dict(checkpoint["state_dict"]) else: # cpu checkpoint = torch.load(model_path, map_location='cpu') state = convert_state_dict(checkpoint["state_dict"]) model.load_state_dict(state) validate_epoch(val_loader, model, loss_fn, use_cuda)
def read(self, reader: BinaryIO, platform: GamePlatform): name_length = 24 if platform == GamePlatform.playstation else 20 self.name = reader.read_str(name_length).split('\0')[0] # important to re-add this when writing self.location, self.length, timestamp, self.unknown = reader.read_fmt('IIII') self.create_date = datetime.date.fromtimestamp(timestamp) ext = self.name.split('.')[-1].lower() self.loader = get_loader(ext)(self)
def read(self, reader: BinaryIO, platform: GamePlatform): name_length = 24 if platform == GamePlatform.playstation else 20 self.name = reader.read_str(name_length).split('\0')[ 0] # important to re-add this when writing self.location, self.length, timestamp, self.unknown = reader.read_fmt( 'IIII') self.create_date = datetime.date.fromtimestamp(timestamp) ext = self.name.split('.')[-1].lower() self.loader = get_loader(ext)(self)
def main(): args = init_args() detector = FasterRCNN() attribute_extractor = MgnWrapper(args.weights_path) dataloader = loaders.get_loader(args.video_path, args.loader, args.interval) # TODO: (nhendy) do this mapping in a config file if args.config: json_dict = config_parser.parse_json_filename(args.config) trigger_causes = config_parser.extract_line_trigger_list(json_dict) else: trigger_causes = [ VectorTrigger( # TODO: (nhendy) weird hardcoded name "NE_Moiz", np.array([227, 470, 227 + 244, 470]), np.array([917, 537]), 500, 5), VectorTrigger( # TODO: (nhendy) weird hardcoded name "NE_Moiz", np.array([1433, 384, 1900, 384]), np.array([917, 537]), 500, 5) ] gallery = galleries.TriggerLineGallery(attribute_extractor, trigger_causes) temp_dir = tempfile.mkdtemp() # create trackers for each video/camera sort_trackers = { vidnames: Sort() for vidnames in dataloader.get_vid_names() } output_files = { vidnames: open(os.path.join(temp_dir, "{}.txt".format(vidnames)), "w") for vidnames in dataloader.get_vid_names() } # Run detector, Sort and fill up gallery run_mot_and_fill_gallery(dataloader, gallery, detector, sort_trackers, output_files) # Save images from gallery captured throughout video write_gallery_imgs(gallery.people(), args.gallery_path) """
def main(): parser = argparse.ArgumentParser() parser.add_argument( "--path", default=None, type=str, required=True, help="The input data dir. Should contain the path to .tsv file.", ) parser.add_argument( "--processing_method", type=str, default='pandas', help= "Method for loading data. Options: db – use database, pandas – use dataframe." ) parser.add_argument("--sep", type=str, default='\t', help="Separator for file processing.") parser.add_argument("--output_file", type=str, default="test_proc.tsv", help="Name or full path for output data.") parser.add_argument("--norm_function", type=str, default='zscore', help="Function for processing.") parser.add_argument( "--host", type=str, default='zscore', help= "Optional. Path to SQLite DB. Example: sqlite:///your_filename.db. By default: :memory:" ) args = process_args(parser.parse_args()) output_file = args.pop('output_file') loader = get_loader(**args) loader.load_data() loader.extract_features() loader.export(output_file)
def test(args): model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[:model_file_name.find('_')] # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, split=args.split, is_transform=True, img_size=(args.img_rows, args.img_cols), no_gt=args.no_gt, seed=args.seed) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) n_classes = loader.n_classes testloader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=4, pin_memory=True) # Setup Model model = torchvision.models.mobilenet_v2(pretrained=True) num_ftrs = model.last_channel model.classifier = nn.Sequential( nn.Dropout(0.2), nn.Linear(num_ftrs, n_classes), ) model.cuda() checkpoint = torch.load(args.model_path) state = convert_state_dict(checkpoint['model_state']) model_dict = model.state_dict() model_dict.update(state) model.load_state_dict(model_dict) print( "Loaded checkpoint '{}' (epoch {}, mapk {:.5f}, top1_acc {:7.3f}, top2_acc {:7.3f} top3_acc {:7.3f})" .format(args.model_path, checkpoint['epoch'], checkpoint['mapk'], checkpoint['top1_acc'], checkpoint['top2_acc'], checkpoint['top3_acc'])) running_metrics = runningScore(n_classes) pred_dict = collections.OrderedDict() mapk = AverageMeter() model.eval() with torch.no_grad(): for i, (images, labels, _, names) in tqdm(enumerate(testloader)): plt.imshow((images[0].numpy().transpose(1, 2, 0) - np.min(images[0].numpy().transpose(1, 2, 0))) / (np.max(images[0].numpy().transpose(1, 2, 0) - np.min(images[0].numpy().transpose(1, 2, 0))))) plt.show() images = images.cuda() if args.tta: images_flip = flip(images, dim=3) outputs = model(images) if args.tta: outputs_flip = model(images_flip) prob = F.softmax(outputs, dim=1) if args.tta: prob_flip = F.softmax(outputs_flip, dim=1) prob = (prob + prob_flip) / 2.0 _, pred = prob.topk(k=3, dim=1, largest=True, sorted=True) for k in range(images.size(0)): pred_dict[int(names[0][k])] = loader.encode_pred_name( pred[k, :]) if not args.no_gt: running_metrics.update(labels, pred) mapk_val = mapk(labels, pred, k=3) mapk.update(mapk_val, n=images.size(0)) print('Mean Average Precision (MAP) @ 3: {:.5f}'.format(mapk.avg)) if not args.no_gt: print('Mean Average Precision (MAP) @ 3: {:.5f}'.format(mapk.avg)) score, class_iou = running_metrics.get_scores() for k, v in score.items(): print(k, v) #for i in range(n_classes): # print(i, class_iou[i]) running_metrics.reset() mapk.reset() # Create submission sub = pd.DataFrame.from_dict(pred_dict, orient='index') sub.index.names = ['key_id'] sub.columns = ['word'] sub.to_csv('{}_{}x{}.csv'.format(args.split, args.img_rows, args.img_cols))
def test(args): if not os.path.exists(args.root_results): os.makedirs(args.root_results) model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[:model_file_name.find('_')] # Setup Transforms rgb_mean = [122.7717 / 255., 115.9465 / 255., 102.9801 / 255.] if args.norm_type == 'gn' and args.load_pretrained else [ 0.485, 0.456, 0.406 ] rgb_std = [1. / 255., 1. / 255., 1. / 255.] if args.norm_type == 'gn' and args.load_pretrained else [ 0.229, 0.224, 0.225 ] data_trans = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(size=(args.img_rows, args.img_cols)), transforms.ToTensor(), transforms.Normalize(mean=rgb_mean, std=rgb_std), ]) # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, split=args.split, in_channels=args.in_channels, transforms=data_trans, fold_num=args.fold_num, num_folds=args.num_folds, no_gt=args.no_gt, seed=args.seed) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) n_classes = loader.n_classes testloader = data.DataLoader( loader, batch_size=args.batch_size) #, num_workers=2, pin_memory=True) # Setup Model model = get_model(model_name, n_classes=1, in_channels=args.in_channels, norm_type=args.norm_type, use_cbam=args.use_cbam) model.cuda() checkpoint = torch.load(args.model_path) #, encoding="latin1") state = convert_state_dict(checkpoint['model_state']) model_dict = model.state_dict() model_dict.update(state) model.load_state_dict(model_dict) saved_iter = checkpoint.get('iter', -1) dice_val = checkpoint.get('dice', -1) wacc_val = checkpoint.get('wacc', -1) print("Loaded checkpoint '{}' (iter {}, dice {:.5f}, wAcc {:.5f})".format( args.model_path, saved_iter, dice_val, wacc_val)) running_metrics = runningScore( n_classes=2, weight_acc_non_empty=args.weight_acc_non_empty) y_prob = np.zeros((loader.__len__(), 1, 1024, 1024), dtype=np.float32) y_pred_sum = np.zeros((loader.__len__(), ), dtype=np.int32) pred_dict = collections.OrderedDict() num_non_empty_masks = 0 model.eval() with torch.no_grad(): for i, (images, labels, _) in tqdm(enumerate(testloader)): images = images.cuda() labels = labels.cuda() if args.tta: bs, c, h, w = images.size() images = torch.cat( [images, torch.flip(images, dims=[3])], dim=0) # hflip outputs = model(images, return_aux=False) prob = F.sigmoid(outputs) if args.tta: prob = prob.view(-1, bs, 1, h, w) prob[1, :, :, :, :] = torch.flip(prob[1, :, :, :, :], dims=[3]) prob = prob.mean(0) pred = (prob > args.thresh).long() pred_sum = pred.sum(3).sum(2).sum(1) y_prob[i * args.batch_size:i * args.batch_size + labels.size(0), :, :, :] = prob.cpu().numpy() y_pred_sum[i * args.batch_size:i * args.batch_size + labels.size(0)] = pred_sum.cpu().numpy() y_pred_sum_argsorted = np.argsort(y_pred_sum)[::-1] pruned_idx = int(y_pred_sum_argsorted.shape[0] * args.non_empty_ratio) mask_sum_thresh = int( y_pred_sum[y_pred_sum_argsorted[pruned_idx]] ) if pruned_idx < y_pred_sum_argsorted.shape[0] else 0 for i, (_, labels, names) in tqdm(enumerate(testloader)): labels = labels.cuda() prob = torch.from_numpy( y_prob[i * args.batch_size:i * args.batch_size + labels.size(0), :, :, :]).float().cuda() pred = (prob > args.thresh).long() pred_sum = pred.sum(3).sum(2).sum(1) for k in range(labels.size(0)): if pred_sum[k] > mask_sum_thresh: num_non_empty_masks += 1 else: pred[k, :, :, :] = torch.zeros_like(pred[k, :, :, :]) if args.only_non_empty: pred[k, :, 0, 0] = 1 if not args.no_gt: running_metrics.update(labels.long(), pred.long()) """ if args.split == 'test': for k in range(labels.size(0)): name = names[0][k] if pred_dict.get(name, None) is None: mask = pred[k, 0, :, :].cpu().numpy() rle = loader.mask2rle(mask) pred_dict[name] = rle #""" print( '# non-empty masks: {:5d} (non_empty_ratio: {:.5f} / mask_sum_thresh: {:6d})' .format(num_non_empty_masks, args.non_empty_ratio, mask_sum_thresh)) if not args.no_gt: dice, dice_empty, dice_non_empty, miou, wacc, acc_empty, acc_non_empty = running_metrics.get_scores( ) print('Dice (per image): {:.5f} (empty: {:.5f} / non-empty: {:.5f})'. format(dice, dice_empty, dice_non_empty)) print('wAcc: {:.5f} (empty: {:.5f} / non-empty: {:.5f})'.format( wacc, acc_empty, acc_non_empty)) print('Overall mIoU: {:.5f}'.format(miou)) running_metrics.reset() if args.split == 'test': fold_num, num_folds = model_file_name.split('_')[4].split('-') prob_file_name = 'prob-{}_{}x{}_{}_{}_{}-{}'.format( args.split, args.img_rows, args.img_cols, model_name, saved_iter, fold_num, num_folds) np.save( os.path.join(args.root_results, '{}.npy'.format(prob_file_name)), y_prob) """
import argparse from loaders import get_loader parser = argparse.ArgumentParser(description='Test-REST') parser.add_argument('filename', type=str, help='test cases file') parser.add_argument('--format', '-f', type=str, help='file format, default = yaml') args = parser.parse_args() if args.format: loader = get_loader(args.format) else: loader = get_loader(args.filename.split('.')[-1]) if not loader: raise ValueError('format is not defined') actions = loader(args.filename).load() actions.run()
def train(args): # Setup Dataloader wc_data_loader = get_loader('doc3dwc') data_path = args.data_path wc_t_loader = wc_data_loader(data_path, is_transform=True, img_size=(args.wc_img_rows, args.wc_img_cols), augmentations=args.augmentation) wc_v_loader = wc_data_loader(data_path, is_transform=True, split='val', img_size=(args.wc_img_rows, args.wc_img_cols)) wc_n_classes = wc_t_loader.n_classes wc_trainloader = data.DataLoader(wc_t_loader, batch_size=args.batch_size, num_workers=8, shuffle=True) wc_valloader = data.DataLoader(wc_v_loader, batch_size=args.batch_size, num_workers=8) # Setup Model model_wc = get_model('unetnc', wc_n_classes, in_channels=3) model_wc = torch.nn.DataParallel(model_wc, device_ids=range( torch.cuda.device_count())) model_wc.cuda() # Setup Dataloader bm_data_loader = get_loader('doc3dbmnic') bm_t_loader = bm_data_loader(data_path, is_transform=True, img_size=(args.bm_img_rows, args.bm_img_cols)) bm_v_loader = bm_data_loader(data_path, is_transform=True, split='val', img_size=(args.bm_img_rows, args.bm_img_cols)) bm_n_classes = bm_t_loader.n_classes bm_trainloader = data.DataLoader(bm_t_loader, batch_size=args.batch_size, num_workers=8, shuffle=True) bm_valloader = data.DataLoader(bm_v_loader, batch_size=args.batch_size, num_workers=8) # Setup Model model_bm = get_model('dnetccnl', bm_n_classes, in_channels=3) model_bm = torch.nn.DataParallel(model_bm, device_ids=range( torch.cuda.device_count())) model_bm.cuda() if os.path.isfile(args.shape_net_loc): print("Loading model_wc from checkpoint '{}'".format( args.shape_net_loc)) checkpoint = torch.load(args.shape_net_loc) model_wc.load_state_dict(checkpoint['model_state']) print("Loaded checkpoint '{}' (epoch {})".format( args.shape_net_loc, checkpoint['epoch'])) else: print("No model_wc checkpoint found at '{}'".format( args.shape_net_loc)) exit(1) if os.path.isfile(args.texture_mapping_net_loc): print("Loading model_bm from checkpoint '{}'".format( args.texture_mapping_net_loc)) checkpoint = torch.load(args.texture_mapping_net_loc) model_bm.load_state_dict(checkpoint['model_state']) print("Loaded checkpoint '{}' (epoch {})".format( args.texture_mapping_net_loc, checkpoint['epoch'])) else: print("No model_bm checkpoint found at '{}'".format( args.texture_mapping_net_loc)) exit(1) # Activation htan = nn.Hardtanh(0, 1.0) # Optimizer optimizer = torch.optim.Adam(list(model_wc.parameters()) + list(model_bm.parameters()), lr=args.l_rate, weight_decay=5e-4, amsgrad=True) # LR Scheduler sched = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True) # Losses MSE = nn.MSELoss() loss_fn = nn.L1Loss() gloss = grad_loss.Gradloss(window_size=5, padding=2) reconst_loss = recon_lossc.Unwarploss() epoch_start = 0 # Log file: if not os.path.exists(args.logdir): os.makedirs(args.logdir) experiment_name = 'joint train' log_file_name = os.path.join(args.logdir, experiment_name + '.txt') if os.path.isfile(log_file_name): log_file = open(log_file_name, 'a') else: log_file = open(log_file_name, 'w+') log_file.write('\n--------------- ' + experiment_name + ' ---------------\n') log_file.close() # Setup tensorboard for visualization if args.tboard: # save logs in runs/<experiment_name> writer = SummaryWriter(comment=experiment_name) best_val_mse = 99999.0 global_step = 0 LClambda = 0.2 bm_img_size = (128, 128) alpha = 0.5 beta = 0.5 for epoch in range(epoch_start, args.n_epoch): avg_loss = 0.0 wc_avg_l1loss = 0.0 wc_avg_gloss = 0.0 wc_train_mse = 0.0 bm_avgl1loss = 0.0 bm_avgrloss = 0.0 bm_avgssimloss = 0.0 bm_train_mse = 0.0 model_wc.train() model_bm.train() if epoch == 50 and LClambda < 1.0: LClambda += 0.2 for (i, (wc_images, wc_labels)), (i, (bm_images, bm_labels)) in zip( enumerate(wc_trainloader), enumerate(bm_trainloader)): wc_images = Variable(wc_images.cuda()) wc_labels = Variable(wc_labels.cuda()) optimizer.zero_grad() wc_outputs = model_wc(wc_images) pred_wc = htan(wc_outputs) g_loss = gloss(pred_wc, wc_labels) wc_l1loss = loss_fn(pred_wc, wc_labels) loss = alpha * (wc_l1loss + LClambda * g_loss) bm_images = Variable(bm_images.cuda()) bm_labels = Variable(bm_labels.cuda()) bm_input = F.interpolate(pred_wc, bm_img_size) target = model_bm(bm_input) target_nhwc = target.transpose(1, 2).transpose(2, 3) bm_val_l1loss = loss_fn(target_nhwc, bm_labels) rloss, ssim, uworg, uwpred = reconst_loss(bm_images[:, :-1, :, :], target_nhwc, bm_labels) loss += beta * ((10.0 * bm_val_l1loss) + (0.5 * rloss)) avg_loss += float(loss) wc_avg_l1loss += float(wc_l1loss) wc_avg_gloss += float(g_loss) wc_train_mse += float(MSE(pred_wc, wc_labels).item()) bm_avgl1loss += float(bm_val_l1loss) bm_avgrloss += float(rloss) bm_avgssimloss += float(ssim) bm_train_mse += float(MSE(target_nhwc, bm_labels).item()) loss.backward() optimizer.step() global_step += 1 if (i + 1) % 50 == 0: print("Epoch[%d/%d] Batch [%d/%d] Loss: %.4f" % (epoch + 1, args.n_epoch, i + 1, len(wc_trainloader), avg_loss / 50.0)) avg_loss = 0.0 if args.tboard and (i + 1) % 20 == 0: show_wc_tnsboard(global_step, writer, wc_images, wc_labels, pred_wc, 8, 'Train Inputs', 'Train WCs', 'Train pred_wc. WCs') writer.add_scalar('WC: L1 Loss/train', wc_avg_l1loss / (i + 1), global_step) writer.add_scalar('WC: Grad Loss/train', wc_avg_gloss / (i + 1), global_step) show_unwarp_tnsboard(bm_images, global_step, writer, uwpred, uworg, 8, 'Train GT unwarp', 'Train Pred Unwarp') writer.add_scalar('BM: L1 Loss/train', bm_avgl1loss / (i + 1), global_step) writer.add_scalar('CB: Recon Loss/train', bm_avgrloss / (i + 1), global_step) writer.add_scalar('CB: SSIM Loss/train', bm_avgssimloss / (i + 1), global_step) wc_train_mse = wc_train_mse / len(wc_trainloader) wc_avg_l1loss = wc_avg_l1loss / len(wc_trainloader) wc_avg_gloss = wc_avg_gloss / len(wc_trainloader) print("wc Training L1:%4f" % (wc_avg_l1loss)) print("wc Training MSE:'{}'".format(wc_train_mse)) wc_train_losses = [wc_avg_l1loss, wc_train_mse, wc_avg_gloss] lrate = get_lr(optimizer) write_log_file(log_file_name, wc_train_losses, epoch + 1, lrate, 'Train', 'wc') bm_avgssimloss = bm_avgssimloss / len(bm_trainloader) bm_avgrloss = bm_avgrloss / len(bm_trainloader) bm_avgl1loss = bm_avgl1loss / len(bm_trainloader) bm_train_mse = bm_train_mse / len(bm_trainloader) print("bm Training L1:%4f" % (bm_avgl1loss)) print("bm Training MSE:'{}'".format(bm_train_mse)) bm_train_losses = [ bm_avgl1loss, bm_train_mse, bm_avgrloss, bm_avgssimloss ] write_log_file(log_file_name, bm_train_losses, epoch + 1, lrate, 'Train', 'bm') model_wc.eval() model_bm.eval() val_mse = 0.0 val_loss = 0.0 wc_val_loss = 0.0 wc_val_gloss = 0.0 wc_val_mse = 0.0 bm_val_l1loss = 0.0 val_rloss = 0.0 val_ssimloss = 0.0 bm_val_mse = 0.0 for (i_val, (wc_images_val, wc_labels_val)), (i_val, (bm_images_val, bm_labels_val)) in tqdm( zip(enumerate(wc_valloader), enumerate(bm_valloader))): with torch.no_grad(): wc_images_val = Variable(wc_images_val.cuda()) wc_labels_val = Variable(wc_labels_val.cuda()) wc_outputs = model_wc(wc_images_val) pred_val = htan(wc_outputs) wc_g_loss = gloss(pred_val, wc_labels_val).cpu() pred_val = pred_val.cpu() wc_labels_val = wc_labels_val.cpu() wc_val_loss += loss_fn(pred_val, wc_labels_val) wc_val_mse += float(MSE(pred_val, wc_labels_val)) wc_val_gloss += float(wc_g_loss) bm_images_val = Variable(bm_images_val.cuda()) bm_labels_val = Variable(bm_labels_val.cuda()) bm_input = F.interpolate(pred_val, bm_img_size) target = model_bm(bm_input) target_nhwc = target.transpose(1, 2).transpose(2, 3) pred = target_nhwc.data.cpu() gt = bm_labels_val.cpu() bm_val_l1loss += loss_fn(target_nhwc, bm_labels_val) rloss, ssim, uworg, uwpred = reconst_loss( bm_images_val[:, :-1, :, :], target_nhwc, bm_labels_val) val_rloss += float(rloss.cpu()) val_ssimloss += float(ssim.cpu()) bm_val_mse += float(MSE(pred, gt)) val_loss += (alpha * wc_val_loss + beta * bm_val_l1loss) val_mse += (wc_val_mse + bm_val_mse) if args.tboard: show_unwarp_tnsboard(bm_images_val, epoch + 1, writer, uwpred, uworg, 8, 'Val GT unwarp', 'Val Pred Unwarp') if args.tboard: show_wc_tnsboard(epoch + 1, writer, wc_images_val, wc_labels_val, pred_val, 8, 'Val Inputs', 'Val WCs', 'Val Pred. WCs') writer.add_scalar('WC: L1 Loss/val', wc_val_loss, epoch + 1) writer.add_scalar('WC: Grad Loss/val', wc_val_gloss, epoch + 1) writer.add_scalar('BM: L1 Loss/val', bm_val_l1loss, epoch + 1) writer.add_scalar('CB: Recon Loss/val', val_rloss, epoch + 1) writer.add_scalar('CB: SSIM Loss/val', val_ssimloss, epoch + 1) writer.add_scalar('total val loss', val_loss, epoch + 1) wc_val_loss = wc_val_loss / len(wc_valloader) wc_val_mse = wc_val_mse / len(wc_valloader) wc_val_gloss = wc_val_gloss / len(wc_valloader) print("wc val loss at epoch {}:: {}".format(epoch + 1, wc_val_loss)) print("wc val MSE: {}".format(wc_val_mse)) bm_val_l1loss = bm_val_l1loss / len(bm_valloader) bm_val_mse = bm_val_mse / len(bm_valloader) val_ssimloss = val_ssimloss / len(bm_valloader) val_rloss = val_rloss / len(bm_valloader) print("bm val loss at epoch {}:: {}".format(epoch + 1, bm_val_l1loss)) print("bm val mse: {}".format(bm_val_mse)) val_loss /= len(wc_valloader) val_mse /= len(wc_valloader) print("val loss at epoch {}:: {}".format(epoch + 1, val_loss)) print("val mse: {}".format(val_mse)) bm_val_losses = [bm_val_l1loss, bm_val_mse, val_rloss, val_ssimloss] wc_val_losses = [wc_val_loss, wc_val_mse, wc_val_gloss] total_val_losses = [val_loss, val_mse] write_log_file(log_file_name, wc_val_losses, epoch + 1, lrate, 'Val', 'wc') write_log_file(log_file_name, bm_val_losses, epoch + 1, lrate, 'Val', 'bm') write_log_file(log_file_name, total_val_losses, epoch + 1, lrate, 'Val', 'total') # reduce learning rate sched.step(val_mse) if val_mse < best_val_mse: best_val_mse = val_mse state_wc = { 'epoch': epoch + 1, 'model_state': model_wc.state_dict(), 'optimizer_state': optimizer.state_dict(), } torch.save( state_wc, args.logdir + "{}_{}_{}_{}_{}_best_wc_model.pkl".format( 'unetnc', epoch + 1, wc_val_mse, wc_train_mse, experiment_name)) state_bm = { 'epoch': epoch + 1, 'model_state': model_bm.state_dict(), 'optimizer_state': optimizer.state_dict(), } torch.save( state_bm, args.logdir + "{}_{}_{}_{}_{}_best_bm_model.pkl".format( 'dnetccnl', epoch + 1, bm_val_mse, bm_train_mse, experiment_name)) if (epoch + 1) % 10 == 0 and epoch > 70: state_wc = { 'epoch': epoch + 1, 'model_state': model_wc.state_dict(), 'optimizer_state': optimizer.state_dict(), } torch.save( state_wc, args.logdir + "{}_{}_{}_{}_{}_wc_model.pkl".format( 'unetnc', epoch + 1, wc_val_mse, wc_train_mse, experiment_name)) state_bm = { 'epoch': epoch + 1, 'model_state': model_bm.state_dict(), 'optimizer_state': optimizer.state_dict(), } torch.save( state_bm, args.logdir + "{}_{}_{}_{}_{}_bm_model.pkl".format( 'dnetccnl', epoch + 1, bm_val_mse, bm_train_mse, experiment_name))
def train(args): if not os.path.exists('checkpoints'): os.mkdir('checkpoints') # Setup Augmentations data_aug = transforms.Compose([ transforms.ToPILImage(), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomAffine(degrees=10, translate=(0.05, 0.05), scale=(0.95, 1.05)), ]) # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train', version='simplified', img_size=(args.img_rows, args.img_cols), augmentations=data_aug, train_fold_num=args.train_fold_num, num_train_folds=args.num_train_folds, seed=args.seed) v_loader = data_loader(data_path, is_transform=True, split='val', version='simplified', img_size=(args.img_rows, args.img_cols), num_val=args.num_val, seed=args.seed) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) n_classes = t_loader.n_classes trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=2, shuffle=True, pin_memory=True, drop_last=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=2, pin_memory=True) # Setup Metrics running_metrics = runningScore(n_classes) # Setup Model # model = get_model(args.arch, n_classes, use_cbam=args.use_cbam) model = torchvision.models.mobilenet_v2(pretrained=True) num_ftrs = model.last_channel model.classifier = nn.Sequential( nn.Dropout(0.2), nn.Linear(num_ftrs, n_classes), ) model.cuda() # Check if model has custom optimizer / loss if hasattr(model, 'optimizer'): optimizer = model.optimizer else: ##optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=args.l_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer = torch.optim.Adam(model.parameters(), lr=args.l_rate, weight_decay=args.weight_decay) # if args.num_cycles > 0: # len_trainloader = int(5e6) # 4960414 # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.num_train_folds*len_trainloader//args.num_cycles, eta_min=args.l_rate*1e-1) # else: # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[2, 4, 6, 8], gamma=0.5) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode='min',factor=0.5,patience=5,cooldown=5,min_lr=1e-7) if hasattr(model, 'loss'): print('Using custom loss') loss_fn = model.loss else: loss_fn = F.cross_entropy start_epoch = 0 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model_dict = model.state_dict() if checkpoint.get('model_state', -1) == -1: model_dict.update(convert_state_dict(checkpoint, load_classifier=args.load_classifier)) else: model_dict.update(convert_state_dict(checkpoint['model_state'], load_classifier=args.load_classifier)) print("Loaded checkpoint '{}' (epoch {}, mapk {:.5f}, top1_acc {:7.3f}, top2_acc {:7.3f} top3_acc {:7.3f})" .format(args.resume, checkpoint['epoch'], checkpoint['mapk'], checkpoint['top1_acc'], checkpoint['top2_acc'], checkpoint['top3_acc'])) model.load_state_dict(model_dict) if checkpoint.get('optimizer_state', None) is not None: optimizer.load_state_dict(checkpoint['optimizer_state']) start_epoch = checkpoint['epoch'] else: print("No checkpoint found at '{}'".format(args.resume)) loss_sum = 0.0 for epoch in range(start_epoch, args.n_epoch): start_train_time = timeit.default_timer() model.train() optimizer.zero_grad() for i, (images, labels, recognized, _) in enumerate(trainloader): images = images.cuda() labels = labels.cuda() recognized = recognized.cuda() outputs = model(images) loss = (loss_fn(outputs, labels.view(-1), ignore_index=t_loader.ignore_index, reduction='none') * recognized.view(-1)).mean() # loss = loss / float(args.iter_size) # Accumulated gradients loss_sum = loss_sum + loss loss.backward() if (i+1) % args.print_train_freq == 0: print("Epoch [%d/%d] Iter [%6d/%6d] Loss: %.4f" % (epoch+1, args.n_epoch, i+1, len(trainloader), loss_sum)) if (i+1) % args.iter_size == 0 or i == len(trainloader) - 1: optimizer.step() optimizer.zero_grad() loss_sum = 0.0 mapk_val = AverageMeter() top1_acc_val = AverageMeter() top2_acc_val = AverageMeter() top3_acc_val = AverageMeter() mean_loss_val = AverageMeter() model.eval() with torch.no_grad(): for i_val, (images_val, labels_val, recognized_val, _) in tqdm(enumerate(valloader)): images_val = images_val.cuda() labels_val = labels_val.cuda() recognized_val = recognized_val.cuda() outputs_val = model(images_val) loss_val = (loss_fn(outputs_val, labels_val.view(-1), ignore_index=v_loader.ignore_index, reduction='none') * recognized_val.view(-1)).mean() mean_loss_val.update(loss_val, n=images_val.size(0)) _, pred = outputs_val.topk(k=3, dim=1, largest=True, sorted=True) running_metrics.update(labels_val, pred[:, 0]) acc1, acc2, acc3 = accuracy(outputs_val, labels_val, topk=(1, 2, 3)) top1_acc_val.update(acc1, n=images_val.size(0)) top2_acc_val.update(acc2, n=images_val.size(0)) top3_acc_val.update(acc3, n=images_val.size(0)) mapk_v = mapk(labels_val, pred, k=3) mapk_val.update(mapk_v, n=images_val.size(0)) print('Mean Average Precision (MAP) @ 3: {:.5f}'.format(mapk_val.avg)) print('Top 3 accuracy: {:7.3f} / {:7.3f} / {:7.3f}'.format(top1_acc_val.avg, top2_acc_val.avg, top3_acc_val.avg)) print('Mean val loss: {:.4f}'.format(mean_loss_val.avg)) score, class_iou = running_metrics.get_scores() for k, v in score.items(): print(k, v) #for i in range(n_classes): # print(i, class_iou[i]) scheduler.step(mean_loss_val.avg) state = {'epoch': epoch+1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'mapk': mapk_val.avg, 'top1_acc': top1_acc_val.avg, 'top2_acc': top2_acc_val.avg, 'top3_acc': top3_acc_val.avg,} torch.save(state, "checkpoints/{}_{}_{}_{}x{}_{}-{}-{}_model.pth".format(args.arch, args.dataset, epoch+1, args.img_rows, args.img_cols, args.train_fold_num, args.num_train_folds, args.num_val)) running_metrics.reset() mapk_val.reset() top1_acc_val.reset() top2_acc_val.reset() top3_acc_val.reset() mean_loss_val.reset() elapsed_train_time = timeit.default_timer() - start_train_time print('Training time (epoch {0:5d}): {1:10.5f} seconds'.format(epoch+1, elapsed_train_time))
def train(n_epoch=50, batch_size=32, resume=False, tboard=False, ne_path=''): model_name = 'unetnc' # Setup dataloader data_path = 'C:/Users/yuttapichai.lam/dev-environment/doc3d' data_loader = get_loader('se_refine') t_loader = data_loader(data_path, is_transform=True, img_size=(256, 256)) v_loader = data_loader(data_path, split='val', is_transform=True, img_size=(256, 256)) trainloader = data.DataLoader(t_loader, batch_size=batch_size, num_workers=8, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=batch_size, num_workers=8) # Load models print('Loading') model = get_model(model_name, n_classes=3, in_channels=6) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) model.cuda() # print(model) # print(len(list(model.parameters))) # Setup optimizer and learning rate reduction print('Setting optimizer') optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=5e-4, amsgrad=True) schedule = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True) # Set Activation function htan = nn.Hardtanh(-1.0, 1.0) # sigmoid = nn.Sigmoid() # Setup losses MSE = nn.MSELoss() loss_fn = nn.L1Loss() epoch_start = 0 global_step = 0 if tboard: writer = SummaryWriter(comment='Refinement_SE') if resume: print('Resume from previous state') ne_chkpnt = torch.load(ne_path) model.load_state_dict(ne_chkpnt['model_state']) epoch_start = ne_chkpnt['epoch'] optimizer.load_state_dict(ne_chkpnt['optimizer_state']) best_val_mse = 9999999.0 print(f'Start from epoch {epoch_start} of {n_epoch}') print('Starting') for epoch in range(epoch_start, n_epoch): print(f'Epoch: {epoch}') # Loss initialization avg_loss = 0.0 avg_l1loss = 0.0 train_mse = 0.0 # Start training model.train() print('Training') for i, (imgs, shades) in enumerate(trainloader): images = Variable(imgs.cuda()) shade_labels = Variable(shades.cuda()) optimizer.zero_grad() # Train SE network se_out = model(images) # ne_out = F.interpolate(ne_out, size=( # 256, 256), mode='bilinear', align_corners=True) # ne_pred = sigmoid(ne_out) se_pred = htan(se_out) # im = ne_pred.cpu().detach().numpy() # # print(im.shape) # im = im.transpose(0, 2, 3, 1) # print(im[0]) # plt.imshow(im[0]) # plt.show() l1_loss = loss_fn(se_pred, shade_labels) mse = MSE(se_pred, shade_labels) # SE Loss avg_l1loss += float(l1_loss) train_mse += float(mse) avg_loss += float(l1_loss) global_step += 1 if (i + 1) % 20 == 0: print( f'Epoch[{epoch}/{n_epoch}] Batch[{i+1}/{len(trainloader)}] Loss: {avg_loss/(i+1):.6f} MSE: {train_mse/(i+1)}' ) if tboard and (i + 1) % 20 == 0: writer.add_scalars( 'Train', { 'L1_Loss/train': avg_loss / (i + 1), 'MSE_Loss/train': train_mse / (i + 1) }, global_step) # mse.backward() l1_loss.backward() optimizer.step() len_trainset = len(trainloader) loss = avg_loss / len_trainset train_mse = train_mse / len_trainset train_losses = [loss, train_mse] print(f'SE L1 loss: {train_losses[0]} SE MSE: {train_losses[1]}') model.eval() val_l1 = 0.0 val_mse = 0.0 print('Validating') for i_val, (imgs_val, shades_val) in tqdm(enumerate(valloader)): with torch.no_grad(): images_val = Variable(imgs_val.cuda()) shade_labels_val = Variable(shades_val.cuda()) # Val SE Network se_out_val = model(images_val) # ne_out_val = F.interpolate(ne_out_val, size=( # 256, 256), mode='bilinear', align_corners=True) # ne_pred_val = sigmoid(ne_out_val) se_pred_val = htan(se_out_val) ne_l1 = loss_fn(se_pred_val, shade_labels_val) ne_mse = MSE(se_pred_val, shade_labels_val) # Val SE Loss val_l1 += float(ne_l1.cpu()) val_mse += float(ne_mse.cpu()) len_valset = len(valloader) val_l1 = val_l1 / len_valset val_mse = val_mse / len_valset val_losses = [val_l1, val_mse] print(f'SE L1 loss: {val_losses[0]} SE MSE: {val_losses[1]}') if tboard: writer.add_scalars('L1', {'train': loss, 'val': val_l1}, epoch) writer.add_scalars('MSE', { 'train': train_mse, 'val': val_mse }, epoch) # Reduce learning rate schedule.step(val_l1) if val_l1 < best_val_mse: best_val_mse = val_l1 state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict() } torch.save( state, f'./checkpoints-se/unetnc_{epoch}_ne_{val_l1}_{loss}_best_model.pkl' ) if (epoch + 1) % 10 == 0: state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict() } torch.save( state, f'./checkpoints-se/unetnc_{epoch}_ne_auto_saving_every_ten_epochs_with_{val_l1}_{loss}_loss.pkl' )
def main(): args = init_args() detector = FasterRCNN() attribute_extractor = MgnWrapper(args.weights_path) dataloader = loaders.get_loader(args.video_path, args.loader, args.interval) ref_img = cv2.imread(args.ref_image_path) # TODO: (nhendy) do this mapping in a config file trigger_causes = [ BboxTrigger( # TODO: (nhendy) weird hardcoded name "NE_Moiz", ref_img, DOOR_CLOSED_THRESHOLD, DOOR_OPEN_THRESHOLD, CHECK_OPEN_COORDS_TWO, TRIGGER_ROI_COORDS_TWO, detector, ), BboxTrigger( # TODO: (nhendy) weird hardcoded name "NE_Moiz", ref_img, DOOR_CLOSED_THRESHOLD, DOOR_OPEN_THRESHOLD, CHECK_OPEN_COORDS_ONE, TRIGGER_ROI_COORDS_ONE, detector, ) ] gallery = galleries.TriggerGallery(attribute_extractor, trigger_causes) temp_dir = tempfile.mkdtemp() # create trackers for each video/camera sort_trackers = { vidnames: Sort() for vidnames in dataloader.get_vid_names() } output_files = { vidnames: open(os.path.join(temp_dir, "{}.txt".format(vidnames)), "w") for vidnames in dataloader.get_vid_names() } # Run detector, Sort and fill up gallery run_mot_and_fill_gallery(dataloader, gallery, detector, sort_trackers, output_files) # Save images from gallery captured throughout video write_gallery_imgs(gallery.people, args.gallery_path) # Load up the gallery feature vectors gallery_feature_vectors = load_gallery_feat_vectors( args.gallery_path, attribute_extractor) # TODO: (nhendy) this is needed because downstream functions # assume the dict contain numpy arrays not files. Remove later convert_files_to_numpy(temp_dir, output_files) # Run reid model and map track ids to reid ids run_reid_model_and_assign_ids(sort_trackers, attribute_extractor, output_files, gallery_feature_vectors)
def train(n_epoch=50, batch_size=32, resume=False, wc_path='', bm_path=''): wc_model_name = 'unetnc' bm_model_name = 'dnetccnl' # Setup dataloader data_path = 'C:/Users/yuttapichai.lam/dev-environment/doc3d' data_loader = get_loader('doc3djoint') t_loader = data_loader(data_path, is_transform=True, img_size=(256, 256), bm_size=(128, 128)) v_loader = data_loader(data_path, split='val', is_transform=True, img_size=(256, 256), bm_size=(128, 128)) trainloader = data.DataLoader(t_loader, batch_size=batch_size, num_workers=8, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=batch_size, num_workers=8) # Last layer activation htan = nn.Hardtanh(0, 1.0) # Load models print('Loading') wc_model = get_model(wc_model_name, n_classes=3, in_channels=3) wc_model = torch.nn.DataParallel(wc_model, device_ids=range( torch.cuda.device_count())) wc_model.cuda() bm_model = get_model(bm_model_name, n_classes=2, in_channels=3) bm_model = torch.nn.DataParallel(bm_model, device_ids=range( torch.cuda.device_count())) bm_model.cuda() # Setup optimizer and learning rate reduction print('Setting optimizer') optimizer = torch.optim.Adam([{ 'params': wc_model.parameters() }, { 'params': bm_model.parameters() }], lr=1e-4, weight_decay=5e-4, amsgrad=True) schedule = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True) # Setup losses MSE = nn.MSELoss() loss_fn = nn.L1Loss() reconst_loss = recon_lossc.Unwarploss() g_loss = grad_loss.Gradloss(window_size=5, padding=2) epoch_start = 0 if resume: print('Resume from previous state') wc_chkpnt = torch.load(wc_path) wc_model.load_state_dict(wc_chkpnt['model_state']) bm_chkpnt = torch.load(bm_path) bm_model.load_state_dict(bm_chkpnt['model_state']) # optimizer.load_state_dict( # [wc_chkpnt['optimizer_state'], bm_chkpnt['optimizer_state']]) epoch_start = bm_chkpnt['epoch'] best_valwc_mse = 9999999.0 best_valbm_mse = 9999999.0 print(f'Start from epoch {epoch_start} of {n_epoch}') print('Starting') for epoch in range(epoch_start, n_epoch): print(f'Epoch: {epoch}') # Loss initialization avg_loss = 0.0 avg_wcloss = 0.0 avgwcl1loss = 0.0 avg_gloss = 0.0 train_wcmse = 0.0 avg_bmloss = 0.0 avgbml1loss = 0.0 avgrloss = 0.0 avgssimloss = 0.0 train_bmmse = 0.0 avg_const_l1 = 0.0 avg_const_mse = 0.0 # Start training wc_model.train() bm_model.train() print('Training') for i, (imgs, wcs, bms, recons, ims, lbls) in enumerate(trainloader): images = Variable(imgs.cuda()) wc_labels = Variable(wcs.cuda()) bm_labels = Variable(bms.cuda()) recon_labels = Variable(recons.cuda()) im_inputs = Variable(ims.cuda()) labels = Variable(lbls.cuda()) optimizer.zero_grad() # Train WC network wc_out = wc_model(images) wc_out = F.interpolate(wc_out, size=(256, 256), mode='bilinear', align_corners=True) bm_inp = F.interpolate(wc_out, size=(128, 128), mode='bilinear', align_corners=True) bm_inp = htan(bm_inp) wc_pred = htan(wc_out) wc_l1loss = loss_fn(wc_pred, wc_labels) wc_gloss = g_loss(wc_pred, wc_labels) wc_mse = MSE(wc_pred, wc_labels) wc_loss = wc_l1loss + (0.2 * wc_gloss) # WC Loss avgwcl1loss += float(wc_l1loss) avg_gloss += float(wc_gloss) train_wcmse += float(wc_mse) avg_wcloss += float(wc_loss) # Train BM network bm_out = bm_model(bm_inp) bm_out = bm_out.transpose(1, 2).transpose(2, 3) bm_l1loss = loss_fn(bm_out, bm_labels) rloss, ssim, _, _ = reconst_loss(recon_labels, bm_out, bm_labels) bm_mse = MSE(bm_out, bm_labels) bm_loss = (10.0 * bm_l1loss) + (0.5 * rloss) # Loss between unwarped GT and unwarped Predict im_ins = im_inputs[:, :3, :, :] bm_out = bm_out.double() label_in = labels[:, :3, :, :] bm_labels = bm_labels.double() uwpred = unwarp(im_ins, bm_out) uworg = unwarp(label_in, bm_labels) const_l1 = loss_fn(uwpred, uworg) const_mse = MSE(uwpred, uworg) # BM Loss avg_const_l1 += float(const_l1) avg_const_mse += float(const_mse) avgbml1loss += float(bm_l1loss) avgrloss += float(rloss) avgssimloss += float(ssim) train_bmmse += float(bm_mse) avg_bmloss += float(bm_loss) # Step loss loss = (0.5 * wc_loss) + (0.5 * bm_loss) avg_loss += float(loss) # print(f'Epoch[{epoch}/{n_epoch}] Loss: {loss:.6f} Const Loss: {const_l1:.6f}') if (i + 1) % 10 == 0: # Show image _, ax = plt.subplots(1, 2) ax[0].imshow(uworg[0].cpu().detach().numpy().transpose( (1, 2, 0))) ax[1].imshow(uwpred[0].cpu().detach().numpy().transpose( (1, 2, 0))) plt.show() print( f'Epoch[{epoch}/{n_epoch}] Batch[{i+1}/{len(trainloader)}] Loss: {avg_loss/(i+1):.6f} Const Loss: {avg_const_l1/(i+1):.6f}' ) loss.backward() # const_l1.backward() optimizer.step() len_trainset = len(trainloader) avg_const_l1 = avg_const_l1 / len_trainset train_wcmse = train_wcmse / len_trainset train_bmmse = train_bmmse / len_trainset train_losses = [ avgwcl1loss / len_trainset, train_wcmse, avg_gloss / len_trainset, avgbml1loss / len_trainset, train_bmmse, avgrloss / len_trainset, avgssimloss / len_trainset, avg_const_l1, avg_const_mse / len_trainset ] print( f'WC L1 loss: {train_losses[0]} WC MSE: {train_losses[1]} WC GLoss: {train_losses[2]}' ) print( f'BM L1 Loss: {train_losses[3]} BM MSE: {train_losses[4]} BM RLoss: {train_losses[5]} BM SSIM Loss: {train_losses[6]}' ) print( f'Reconstruction against GT => Loss: {train_losses[7]} MSE" {train_losses[8]}' ) wc_model.eval() bm_model.eval() wc_val_l1 = 0.0 wc_val_mse = 0.0 wc_val_gloss = 0.0 bm_val_l1 = 0.0 bm_val_mse = 0.0 bm_val_rloss = 0.0 bm_val_ssim = 0.0 avg_const_l1_val = 0.0 avg_const_mse_val = 0.0 print('Validating') for i_val, (imgs_val, wcs_val, bms_val, recons_val, ims_val, lbls_val) in tqdm(enumerate(valloader)): with torch.no_grad(): images_val = Variable(imgs_val.cuda()) wc_labels_val = Variable(wcs_val.cuda()) bm_labels_val = Variable(bms_val.cuda()) recon_labels_val = Variable(recons_val.cuda()) ims_labels_val = Variable(ims_val.cuda()) labels_val = Variable(lbls_val.cuda()) # Val WC Network wc_out_val = wc_model(images_val) wc_out_val = F.interpolate(wc_out_val, size=(256, 256), mode='bilinear', align_corners=True) bm_inp_val = F.interpolate(wc_out_val, size=(128, 128), mode='bilinear', align_corners=True) bm_inp_val = htan(bm_inp_val) wc_pred_val = htan(wc_out_val) wc_l1 = loss_fn(wc_pred_val, wc_labels_val) wc_gloss = g_loss(wc_pred_val, wc_labels_val) wc_mse = MSE(wc_pred_val, wc_labels_val) # Val BM network bm_out_val = bm_model(bm_inp_val) bm_out_val = bm_out_val.transpose(1, 2).transpose(2, 3) bm_l1 = loss_fn(bm_out_val, bm_labels_val) rloss, ssim, _, _ = reconst_loss(recon_labels_val, bm_out_val, bm_labels_val) bm_mse = MSE(bm_out_val, bm_labels_val) # Loss between unwarped GT and unwarped Predict im_ins_val = ims_labels_val[:, :3, :, :] bm_out_val = bm_out_val.double() lbl_ins_val = labels_val[:, :3, :, :] bm_labels_val = bm_labels_val.double() uwpred_val = unwarp(im_ins_val, bm_out_val) uworg_val = unwarp(lbl_ins_val, bm_labels_val) const_l1_val = loss_fn(uwpred_val, uworg_val) const_mse_val = MSE(uwpred_val, uworg_val) # Val Loss avg_const_l1_val += float(const_l1_val) avg_const_mse_val += float(const_mse_val) wc_val_l1 += float(wc_l1.cpu()) wc_val_gloss += float(wc_gloss.cpu()) wc_val_mse += float(wc_mse.cpu()) bm_val_l1 += float(bm_l1.cpu()) bm_val_mse += float(bm_mse.cpu()) bm_val_rloss += float(rloss.cpu()) bm_val_ssim += float(ssim.cpu()) len_valset = len(valloader) avg_const_l1_val = avg_const_l1_val / len_valset wc_val_mse = wc_val_mse / len_valset bm_val_mse = bm_val_mse / len_valset val_losses = [ wc_val_l1 / len_valset, wc_val_mse, wc_val_gloss / len_valset, bm_val_l1 / len_valset, bm_val_mse, bm_val_rloss / len_valset, bm_val_ssim / len_valset, avg_const_l1_val, avg_const_mse_val / len_valset ] print( f'WC L1 loss: {val_losses[0]} WC MSE: {val_losses[1]} WC GLoss: {val_losses[2]}' ) print( f'BM L1 Loss: {val_losses[3]} BM MSE: {val_losses[4]} BM RLoss: {val_losses[5]} BM SSIM Loss: {val_losses[6]}' ) print( f'Reconstruction against GT => Loss: {val_losses[7]} MSE" {val_losses[8]}' ) # Reduce learning rate schedule.step(bm_val_mse) if wc_val_mse < best_valwc_mse: best_valwc_mse = wc_val_mse state = {'epoch': epoch, 'model_state': wc_model.state_dict()} torch.save( state, f'./checkpoints-wc/unetnc_{epoch}_wc_{wc_val_mse}_{train_wcmse}_best_model.pkl' ) if bm_val_mse < best_valbm_mse: best_valbm_mse = bm_val_mse state = {'epoch': epoch, 'model_state': bm_model.state_dict()} torch.save( state, f'./checkpoints-bm/dnetccnl_{epoch}_bm_{bm_val_mse}_{train_bmmse}_best_model.pkl' )
def load(args): """Downloads the testcases of the given problem.""" loader = loaders.get_loader(args.site_id) loader.load_tests(args.contest_id, args.problem_id, TESTS_DIR)
def test(args): model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[:model_file_name.find('_')] # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, split=args.split, is_transform=True, img_size=(args.img_rows, args.img_cols), no_gt=args.no_gt, seed=args.seed, use_external=args.use_external) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) n_classes = loader.n_classes testloader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=4, pin_memory=True) # Setup Model model = get_model(model_name, n_classes, use_cbam=args.use_cbam) model.cuda() checkpoint = torch.load(args.model_path) state = convert_state_dict(checkpoint['model_state']) model_dict = model.state_dict() model_dict.update(state) model.load_state_dict(model_dict) if checkpoint.get('f1_score', None) is not None: print("Loaded checkpoint '{}' (epoch {}, f1_score {:.5f})" .format(args.model_path, checkpoint['epoch'], checkpoint['f1_score'])) else: print("Loaded checkpoint '{}' (epoch {})" .format(args.model_path, checkpoint['epoch'])) weak_samples = 0 y_true = np.zeros((loader.__len__(), n_classes), dtype=np.int32) y_pred = np.zeros((loader.__len__(), n_classes), dtype=np.int32) y_prob = np.zeros((loader.__len__(), n_classes), dtype=np.float32) y_pow = np.zeros((loader.__len__(),), dtype=np.int64) pow_base = 2 ** np.arange(n_classes) pred_dict = collections.OrderedDict() if args.use_leak: leak_df = pd.read_csv(os.path.join(data_path, 'TestEtraMatchingUnder_259_R14_G12_B10.csv'), index_col='Test')[['Extra', 'SimR', 'SimG', 'SimB']] leak_dict = leak_df.to_dict('index') model.eval() with torch.no_grad(): for i, (images, labels, names) in tqdm(enumerate(testloader)): images = images.cuda() outputs = model(images) prob = F.sigmoid(outputs) if args.tta: sum_prob = prob for j in range(7): images = torch.transpose(images, 2, 3) if j % 2 == 0 else flip(images, dim=3) outputs = model(images) prob = F.sigmoid(outputs) sum_prob = sum_prob + prob prob = sum_prob / 8. if not args.no_gt: y_true[i*args.batch_size:i*args.batch_size+images.size(0), :] = labels.long().cpu().numpy() y_prob[i*args.batch_size:i*args.batch_size+images.size(0), :] = prob.cpu().numpy() y_pred[i*args.batch_size:i*args.batch_size+images.size(0), :] = (prob >= args.thresh).long().cpu().numpy() for k in range(images.size(0)): pred = np.where(y_pred[i*args.batch_size+k, :] == 1)[0].tolist() if len(pred) == 0: pred = [prob.max(1)[1][k].cpu().numpy()] y_pred[i*args.batch_size+k, pred] = 1 weak_samples += 1 name = names[0][k] if args.use_leak: if leak_dict.get(name, None) is not None: sum_sim = leak_dict[name]['SimR'] + leak_dict[name]['SimG'] + leak_dict[name]['SimB'] if sum_sim <= 16:#4: extra_label_name = '_'.join(leak_dict[name]['Extra'].split('_')[1:]) if loader.train_labels.get(extra_label_name, None) is not None: pred_dict[name] = loader.train_labels[extra_label_name]['Target'] if pred_dict.get(name, None) is None: pred_dict[name] = ' '.join(map(str, pred)) y_pow[i*args.batch_size:i*args.batch_size+images.size(0)] = (y_pred[i*args.batch_size:i*args.batch_size+images.size(0), :] * pow_base).sum(1) fold_num, num_folds = model_file_name.split('_')[4].split('-') prob_file_name = '{}_{}x{}_{}_{}_{}-{}'.format(args.split, args.img_rows, args.img_cols, model_name, checkpoint['epoch'], fold_num, num_folds) np.save('prob-{}.npy'.format(prob_file_name), y_prob) if not args.no_gt: f1_score_val = f1_score(y_true, y_pred, labels=[l for l in range(n_classes)], average='macro') print('F1-score (macro): {:.5f}'.format(f1_score_val)) for i in range(n_classes): num = y_pred[:, i].sum() print('{:2d}: {:5d} ({:.5f}) | {:5d} ({:.5f})'.format(i, num, float(num)/y_pred.sum(), loader.class_num_samples_train[i].long(), loader.class_num_samples_train[i]/loader.class_num_samples_train.sum())) print('# of weak samples: {}'.format(weak_samples)) uni, cnt = np.unique(y_pow, return_counts=True) sorted_idx = np.argsort(cnt) for i in range(len(uni)*9//10, len(uni)): uni_b = '{:028b}'.format(uni[sorted_idx[i]]) cls = [] for j in range(n_classes): if int(uni_b[n_classes-1-j]) == 1: cls.append(j) print('{:20s} {:5d}'.format(cls, cnt[sorted_idx[i]])) # Create submission csv_file_name = '{}_{}x{}_{}_{}_{}-{}_{}'.format(args.split, args.img_rows, args.img_cols, model_name, checkpoint['epoch'], fold_num, num_folds, args.thresh) csv_file_name = csv_file_name + '_leak.csv' if args.use_leak else csv_file_name + '.csv' sub = pd.DataFrame.from_dict(pred_dict, orient='index') sub.index.names = ['Id'] sub.columns = ['Predicted'] sub.to_csv(csv_file_name)
ap.add_argument("--loader", required=False, help="data loader name [eg. '--laoder mnist']", default="mnist") args = vars(ap.parse_args()) DB_DIR = args['db'] NETWORK_FILE_NAME = args['network'] DATA_LOADER = args['loader'] print("PARAMS:") print("DB_DIR", DB_DIR) print("NETWORK_FILE_NAME", NETWORK_FILE_NAME) print("DATA_LOADER", DATA_LOADER) loader = loaders.get_loader(DATA_LOADER) input_size, output_size = loader.get_network_constrains() # load validation samples v_features, v_labels = loader.load(DB_DIR, kind="test") # load newtwork from file network = ffn.Builder.load_ffn(NETWORK_FILE_NAME) validation_performance = 0 for i in tqdm(range(len(v_labels))): x = v_features[i] prediction = network.predict(x) predicted_label = helper.get_class(prediction) if v_labels[i] == predicted_label:
def test(args): model_file_name = os.path.split(args.model_path)[1] model_name = model_file_name[:model_file_name.find('_')] # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, split=args.split, is_transform=True, no_gt=args.no_gt, seed=args.seed, sampling_rate=args.sampling_rate, tta=args.tta) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if args.use_cuda: torch.cuda.manual_seed(args.seed) n_classes = loader.n_classes testloader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=1, pin_memory=True) # Setup Model model = get_model(model_name, n_classes, use_cbam=args.use_cbam, in_channels=1) if args.use_cuda: model.cuda() """ mel_spec_layer = Melspectrogram(num_bands=loader.n_mels, sample_rate=loader.sampling_rate, min_freq=loader.fmin, max_freq=loader.fmax, fft_len=loader.n_fft, hop_len=loader.hop_length, power=1.,) if args.use_cuda: mel_spec_layer.cuda() #""" #""" # https://www.kaggle.com/c/freesound-audio-tagging-2019/discussion/91859#529792 pcen_layer = Pcen( sr=loader.sampling_rate, hop_length=loader.hop_length, num_bands=loader.n_mels, gain=0.5, bias=0.001, power=0.2, time_constant=0.4, eps=1e-9, trainable=args.pcen_trainable, ) if args.use_cuda: pcen_layer.cuda() #""" checkpoint = torch.load(args.model_path, map_location=None if args.use_cuda else 'cpu', encoding="latin1") state = convert_state_dict(checkpoint['model_state']) model_dict = model.state_dict() model_dict.update(state) model.load_state_dict(model_dict) if args.pcen_trainable: pcen_state = convert_state_dict(checkpoint['pcen_state']) pcen_layer_dict = pcen_layer.state_dict() pcen_layer_dict.update(pcen_state) pcen_layer.load_state_dict(pcen_layer_dict) print( '-- PCEN --\n gain = {:.5f}/{:.5f}\n bias = {:.5f}/{:.5f}\n power = {:.5f}/{:.5f}\n b = {:.5f}/{:.5f}' .format(pcen_layer.log_gain.exp().min().item(), pcen_layer.log_gain.exp().max().item(), pcen_layer.log_bias.exp().min().item(), pcen_layer.log_bias.exp().max().item(), pcen_layer.log_power.exp().min().item(), pcen_layer.log_power.exp().max().item(), pcen_layer.log_b.exp().min().item(), pcen_layer.log_b.exp().max().item())) if checkpoint.get('lwlrap', None) is not None: print("Loaded checkpoint '{}' (iter {}, lwlrap {:.5f})".format( args.model_path, checkpoint['iter'], checkpoint['lwlrap'])) else: print("Loaded checkpoint '{}' (iter {})".format( args.model_path, checkpoint['iter'])) y_true = np.zeros((loader.__len__(), n_classes), dtype=np.int32) y_prob = np.zeros((loader.__len__(), n_classes), dtype=np.float32) model.eval() ##mel_spec_layer.eval() pcen_layer.eval() with torch.no_grad(): for i, (images, labels, names) in tqdm(enumerate(testloader)): if args.use_cuda: images = images.cuda() if args.tta > 1: bs, num_tta, c, h, w = images.size() images = images.view(-1, c, h, w) ##images = mel_spec_layer(images) images = pcen_layer(images) outputs = model(images) prob = F.sigmoid(outputs) if args.tta > 1: prob = prob.view(bs, num_tta, -1) prob = prob.mean(1) if not args.no_gt: y_true[i * args.batch_size:i * args.batch_size + labels.size(0), :] = labels.long().cpu().numpy( ) if args.use_cuda else labels.long().numpy() y_prob[i * args.batch_size:i * args.batch_size + labels.size(0), :] = prob.cpu().numpy( ) if args.use_cuda else prob.numpy() n_iter = model_file_name.split('_')[2] fold_num, num_folds = model_file_name.split('_')[-2].split('-') prob_file_name = '{}_{}x{}_{}_{}_{}_{}-{}'.format( args.split, args.img_rows, args.img_cols, model_name, n_iter, args.sampling_rate, fold_num, num_folds) np.save('prob-{}.npy'.format(prob_file_name), y_prob) if not args.no_gt: lwlrap_val = calculate_overall_lwlrap_sklearn(y_true, y_prob) print('lwlrap: {:.5f}'.format(lwlrap_val)) # Create submission csv_file_name = '{}_{}x{}_{}_{}_{}_{}-{}.csv'.format( args.split, args.img_rows, args.img_cols, model_name, n_iter, args.sampling_rate, fold_num, num_folds) sub = pd.read_csv(os.path.join(data_path, 'sample_submission.csv'), index_col=0) sub[loader.class_names] = y_prob sub.to_csv(csv_file_name)
def test_get_loader(self): self.assertEqual(get_loader('yaml'), YAMLLoader) self.assertEqual(get_loader('yml'), YAMLLoader) self.assertIsNone(get_loader('xml'))
def merge(args): if not os.path.exists(args.root_results): os.makedirs(args.root_results) # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, split=args.split, transforms=None, fold_num=0, num_folds=1, no_gt=args.no_gt, seed=args.seed, no_load_images=True) n_classes = loader.n_classes testloader = data.DataLoader(loader, batch_size=args.batch_size)#, num_workers=2, pin_memory=True) avg_y_prob = np.zeros((loader.__len__(), 1, 1024, 1024), dtype=np.float32) avg_y_pred_sum = np.zeros((loader.__len__(),), dtype=np.int32) fold_list = [] for prob_file_name in glob.glob(os.path.join(args.root_results, '*.npy')): prob = np.load(prob_file_name, mmap_mode='r') for i in range(loader.__len__()): avg_y_prob[i, :, :, :] += prob[i, :, :, :] fold_list.append(prob_file_name) print(prob_file_name) avg_y_prob = avg_y_prob / len(fold_list) ##avgprob_file_name = 'prob_{}_avg'.format(len(fold_list)) ##np.save(os.path.join(args.root_results, '{}.npy'.format(avgprob_file_name)), avg_y_prob) avg_y_pred = (avg_y_prob > args.thresh).astype(np.int) avg_y_pred_sum = avg_y_pred.sum(3).sum(2).sum(1) avg_y_pred_sum_argsorted = np.argsort(avg_y_pred_sum)[::-1] pruned_idx = int(avg_y_pred_sum_argsorted.shape[0]*args.non_empty_ratio) mask_sum_thresh = int(avg_y_pred_sum[avg_y_pred_sum_argsorted[pruned_idx]]) if pruned_idx < avg_y_pred_sum_argsorted.shape[0] else 0 running_metrics = runningScore(n_classes=2, weight_acc_non_empty=args.weight_acc_non_empty) pred_dict = collections.OrderedDict() num_non_empty_masks = 0 for i, (_, labels, names) in tqdm(enumerate(testloader)): labels = labels.cuda() prob = avg_y_prob[i*args.batch_size:i*args.batch_size+labels.size(0), :, :, :] pred = (prob > args.thresh).astype(np.int) pred = torch.from_numpy(pred).long().cuda() pred_sum = pred.sum(3).sum(2).sum(1) for k in range(labels.size(0)): if pred_sum[k] > mask_sum_thresh: num_non_empty_masks += 1 else: pred[k, :, :, :] = torch.zeros_like(pred[k, :, :, :]) if args.only_non_empty: pred[k, :, 0, 0] = 1 if not args.no_gt: running_metrics.update(labels.long(), pred.long()) for k in range(labels.size(0)): name = names[0][k] if pred_dict.get(name, None) is None: mask = pred[k, 0, :, :].cpu().numpy() rle = loader.mask2rle(mask) pred_dict[name] = rle print('# non-empty masks: {:5d} (non_empty_ratio: {:.5f} / mask_sum_thresh: {:6d})'.format(num_non_empty_masks, args.non_empty_ratio, mask_sum_thresh)) if not args.no_gt: dice, dice_empty, dice_non_empty, miou, acc, acc_empty, acc_non_empty = running_metrics.get_scores() print('Dice (per image): {:.5f} (empty: {:.5f} / non-empty: {:.5f})'.format(dice, dice_empty, dice_non_empty)) print('Classification accuracy: {:.5f} (empty: {:.5f} / non-empty: {:.5f})'.format(acc, acc_empty, acc_non_empty)) print('Overall mIoU: {:.5f}'.format(miou)) running_metrics.reset() # Create submission csv_file_name = 'merged_{}_{}_{}_{}'.format(args.split, len(fold_list), args.thresh, args.non_empty_ratio) sub = pd.DataFrame.from_dict(pred_dict, orient='index') sub.index.names = ['ImageId'] sub.columns = ['EncodedPixels'] sub.to_csv(os.path.join(args.root_results, '{}.csv'.format(csv_file_name)))
def train(args): if not os.path.exists('checkpoints'): os.mkdir('checkpoints') # Setup Augmentations & Transforms rgb_mean = [122.7717 / 255., 115.9465 / 255., 102.9801 / 255.] if args.norm_type == 'gn' and args.load_pretrained else [ 0.485, 0.456, 0.406 ] rgb_std = [1. / 255., 1. / 255., 1. / 255.] if args.norm_type == 'gn' and args.load_pretrained else [ 0.229, 0.224, 0.225 ] data_trans = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(size=(args.img_rows, args.img_cols)), transforms.ToTensor(), transforms.Normalize(mean=rgb_mean, std=rgb_std), ]) # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, transforms=data_trans, in_channels=args.in_channels, split='train', augmentations=True, fold_num=args.fold_num, num_folds=args.num_folds, only_non_empty=args.only_non_empty, seed=args.seed, mask_dilation_size=args.mask_dilation_size) v_loader = data_loader(data_path, transforms=data_trans, in_channels=args.in_channels, split='val', fold_num=args.fold_num, num_folds=args.num_folds, only_non_empty=args.only_non_empty, seed=args.seed) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=2, pin_memory=True, shuffle=args.only_non_empty, drop_last=args.only_non_empty) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=2, pin_memory=True) # Setup Model model = get_model(args.arch, n_classes=1, in_channels=args.in_channels, norm_type=args.norm_type, load_pretrained=args.load_pretrained, use_cbam=args.use_cbam) model.to(torch.device(args.device)) running_metrics = runningScore( n_classes=2, weight_acc_non_empty=args.weight_acc_non_empty, device=args.device) # Check if model has custom optimizer / loss if hasattr(model, 'optimizer'): optimizer = model.optimizer else: warmup_iter = int(args.n_iter * 5. / 100.) milestones = [ int(args.n_iter * 30. / 100.) - warmup_iter, int(args.n_iter * 60. / 100.) - warmup_iter, int(args.n_iter * 90. / 100.) - warmup_iter ] # [30, 60, 90] gamma = 0.5 #0.1 if args.optimizer_type == 'sgd': optimizer = torch.optim.SGD(group_weight(model), lr=args.l_rate, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer_type == 'adam': optimizer = torch.optim.Adam(group_weight(model), lr=args.l_rate, weight_decay=args.weight_decay) else: #if args.optimizer_type == 'radam': optimizer = RAdam(group_weight(model), lr=args.l_rate, weight_decay=args.weight_decay) if args.num_cycles > 0: scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=(args.n_iter - warmup_iter) // args.num_cycles, eta_min=args.l_rate * 0.1) else: scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=milestones, gamma=gamma) scheduler_warmup = GradualWarmupScheduler(optimizer, total_epoch=warmup_iter, min_lr_mul=0.1, after_scheduler=scheduler) start_iter = 0 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume, map_location=torch.device( args.device)) #, encoding="latin1") model_dict = model.state_dict() if checkpoint.get('model_state', None) is not None: model_dict.update(convert_state_dict( checkpoint['model_state'])) else: model_dict.update(convert_state_dict(checkpoint)) start_iter = checkpoint.get('iter', -1) dice_val = checkpoint.get('dice', -1) wacc_val = checkpoint.get('wacc', -1) print("Loaded checkpoint '{}' (iter {}, dice {:.5f}, wAcc {:.5f})". format(args.resume, start_iter, dice_val, wacc_val)) model.load_state_dict(model_dict) if checkpoint.get('optimizer_state', None) is not None: optimizer.load_state_dict(checkpoint['optimizer_state']) del model_dict del checkpoint torch.cuda.empty_cache() else: print("No checkpoint found at '{}'".format(args.resume)) start_iter = args.start_iter if args.start_iter >= 0 else start_iter scale_weight = torch.tensor([1.0, 0.4, 0.4, 0.4]).to(torch.device(args.device)) dice_weight = [args.dice_weight0, args.dice_weight1] lv_margin = [args.lv_margin0, args.lv_margin1] total_loss_sum = 0.0 ms_loss_sum = 0.0 cls_loss_sum = 0.0 t_loader.__gen_batchs__(args.batch_size, ratio=args.ratio) trainloader_iter = iter(trainloader) optimizer.zero_grad() start_train_time = timeit.default_timer() elapsed_train_time = 0.0 best_dice = -100.0 best_wacc = -100.0 for i in range(start_iter, args.n_iter): #""" model.train() if i % args.iter_size == 0: if args.num_cycles == 0: scheduler_warmup.step(i) else: scheduler_warmup.step(i // args.num_cycles) try: images, labels, _ = next(trainloader_iter) except: t_loader.__gen_batchs__(args.batch_size, ratio=args.ratio) trainloader_iter = iter(trainloader) images, labels, _ = next(trainloader_iter) images = images.to(torch.device(args.device)) labels = labels.to(torch.device(args.device)) outputs, outputs_gap = model(images) labels_gap = torch.where( labels.sum(3, keepdim=True).sum(2, keepdim=True) > 0, torch.ones(labels.size(0), 1, 1, 1).to(torch.device(args.device)), torch.zeros(labels.size(0), 1, 1, 1).to(torch.device(args.device))) cls_loss = F.binary_cross_entropy_with_logits( outputs_gap, labels_gap) if args.lambda_cls > 0 else torch.tensor(0.0).to( labels.device) ms_loss = multi_scale_loss(outputs, labels, scale_weight=scale_weight, reduction='mean', alpha=args.alpha, gamma=args.gamma, dice_weight=dice_weight, lv_margin=lv_margin, lambda_fl=args.lambda_fl, lambda_dc=args.lambda_dc, lambda_lv=args.lambda_lv) total_loss = ms_loss + args.lambda_cls * cls_loss total_loss = total_loss / float(args.iter_size) total_loss.backward() total_loss_sum = total_loss_sum + total_loss.item() ms_loss_sum = ms_loss_sum + ms_loss.item() cls_loss_sum = cls_loss_sum + cls_loss.item() if (i + 1) % args.print_train_freq == 0: print("Iter [%7d/%7d] Loss: %7.4f (MS: %7.4f / CLS: %7.4f)" % (i + 1, args.n_iter, total_loss_sum, ms_loss_sum, cls_loss_sum)) if (i + 1) % args.iter_size == 0: optimizer.step() optimizer.zero_grad() total_loss_sum = 0.0 ms_loss_sum = 0.0 cls_loss_sum = 0.0 #""" if args.eval_freq > 0 and (i + 1) % args.eval_freq == 0: state = { 'iter': i + 1, 'model_state': model.state_dict(), } #'optimizer_state': optimizer.state_dict(),} if (i + 1) % int(args.eval_freq / args.save_freq) == 0: torch.save( state, "checkpoints/{}_{}_{}_{}x{}_{}-{}_model.pth".format( args.arch, args.dataset, i + 1, args.img_rows, args.img_cols, args.fold_num, args.num_folds)) dice_val = 0.0 thresh = 0.5 mask_sum_thresh = 0 mean_loss_val = AverageMeter() model.eval() with torch.no_grad(): for i_val, (images_val, labels_val, _) in enumerate(valloader): images_val = images_val.to(torch.device(args.device)) labels_val = labels_val.to(torch.device(args.device)) outputs_val, outputs_gap_val = model(images_val) pred_val = (F.sigmoid(outputs_val if not isinstance( outputs_val, tuple) else outputs_val[0]) > thresh).long() #outputs_val.max(1)[1] pred_val_sum = pred_val.sum(3).sum(2).sum(1) for k in range(labels_val.size(0)): if pred_val_sum[k] < mask_sum_thresh: pred_val[k, :, :, :] = torch.zeros_like( pred_val[k, :, :, :]) labels_gap_val = torch.where( labels_val.sum(3, keepdim=True).sum(2, keepdim=True) > 0, torch.ones(labels_val.size(0), 1, 1, 1).to(torch.device(args.device)), torch.zeros(labels_val.size(0), 1, 1, 1).to(torch.device(args.device))) cls_loss_val = F.binary_cross_entropy_with_logits( outputs_gap_val, labels_gap_val ) if args.lambda_cls > 0 else torch.tensor(0.0).to( labels_val.device) ms_loss_val = multi_scale_loss(outputs_val, labels_val, scale_weight=scale_weight, reduction='mean', alpha=args.alpha, gamma=args.gamma, dice_weight=dice_weight, lv_margin=lv_margin, lambda_fl=args.lambda_fl, lambda_dc=args.lambda_dc, lambda_lv=args.lambda_lv) loss_val = ms_loss_val + args.lambda_cls * cls_loss_val mean_loss_val.update(loss_val.item(), n=labels_val.size(0)) running_metrics.update(labels_val.long(), pred_val.long()) dice_val, dice_empty_val, dice_non_empty_val, miou_val, wacc_val, acc_empty_val, acc_non_empty_val = running_metrics.get_scores( ) print( 'Dice (per image): {:.5f} (empty: {:.5f} / non-empty: {:.5f})'. format(dice_val, dice_empty_val, dice_non_empty_val)) print('wAcc: {:.5f} (empty: {:.5f} / non-empty: {:.5f})'.format( wacc_val, acc_empty_val, acc_non_empty_val)) print('Overall mIoU: {:.5f}'.format(miou_val)) print('Mean val loss: {:.4f}'.format(mean_loss_val.avg)) state['dice'] = dice_val state['wacc'] = wacc_val state['miou'] = miou_val running_metrics.reset() mean_loss_val.reset() if (i + 1) % int(args.eval_freq / args.save_freq) == 0: torch.save( state, "checkpoints/{}_{}_{}_{}x{}_{}-{}_model.pth".format( args.arch, args.dataset, i + 1, args.img_rows, args.img_cols, args.fold_num, args.num_folds)) if best_dice <= dice_val: best_dice = dice_val torch.save( state, "checkpoints/{}_{}_{}_{}x{}_{}-{}_model.pth".format( args.arch, args.dataset, 'best-dice', args.img_rows, args.img_cols, args.fold_num, args.num_folds)) if best_wacc <= wacc_val: best_wacc = wacc_val torch.save( state, "checkpoints/{}_{}_{}_{}x{}_{}-{}_model.pth".format( args.arch, args.dataset, 'best-wacc', args.img_rows, args.img_cols, args.fold_num, args.num_folds)) elapsed_train_time = timeit.default_timer() - start_train_time print('Training time (iter {0:5d}): {1:10.5f} seconds'.format( i + 1, elapsed_train_time)) if args.saving_last_time > 0 and (i + 1) % args.iter_size == 0 and ( timeit.default_timer() - start_train_time) > args.saving_last_time: state = { 'iter': i + 1, 'model_state': model.state_dict(), #} 'optimizer_state': optimizer.state_dict(), } torch.save( state, "checkpoints/{}_{}_{}_{}x{}_{}-{}_model.pth".format( args.arch, args.dataset, i + 1, args.img_rows, args.img_cols, args.fold_num, args.num_folds)) return print('best_dice: {:.5f}; best_wacc: {:.5f}'.format(best_dice, best_wacc))
def train(args): if not os.path.exists('checkpoints'): os.mkdir('checkpoints') # Setup Augmentations data_aug = transforms.Compose([ transforms.ToPILImage(), transforms.RandomCrop(size=(args.img_rows, args.img_cols)), ]) # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split=args.split, fold_num=args.fold_num, num_folds=args.num_folds, seed=args.seed, augmentations=data_aug, sampling_rate=args.sampling_rate, mode='npy') v_loader = data_loader(data_path, is_transform=True, split=args.split.replace('train', 'val'), fold_num=args.fold_num, num_folds=args.num_folds, seed=args.seed, sampling_rate=args.sampling_rate, mode='npy') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) n_classes = t_loader.n_classes trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=4, pin_memory=True, shuffle=True, drop_last=True) valloader = data.DataLoader(v_loader, batch_size=1, num_workers=4, pin_memory=True) # Setup Model model = get_model(args.arch, n_classes, use_cbam=args.use_cbam, in_channels=1, dropout_rate=args.dropout_rate) model.cuda() """ mel_spec_layer = Melspectrogram(num_bands=t_loader.n_mels, sample_rate=t_loader.sampling_rate, min_freq=t_loader.fmin, max_freq=t_loader.fmax, fft_len=t_loader.n_fft, hop_len=t_loader.hop_length, power=1.,) mel_spec_layer.cuda() #""" #""" # https://www.kaggle.com/c/freesound-audio-tagging-2019/discussion/91859#529792 pcen_layer = Pcen( sr=t_loader.sampling_rate, hop_length=t_loader.hop_length, num_bands=t_loader.n_mels, gain=0.5, bias=0.001, power=0.2, time_constant=0.4, eps=1e-9, trainable=args.pcen_trainable, ) pcen_layer.cuda() #""" # Check if model has custom optimizer / loss if hasattr(model, 'optimizer'): optimizer = model.optimizer else: warmup_iter = int(args.n_iter * 5. / 100.) milestones = [ int(args.n_iter * 30. / 100.) - warmup_iter, int(args.n_iter * 60. / 100.) - warmup_iter, int(args.n_iter * 90. / 100.) - warmup_iter ] # [30, 60, 90] gamma = 0.1 if args.pcen_trainable: optimizer = torch.optim.SGD(group_weight(model) + group_weight(pcen_layer), lr=args.l_rate, momentum=args.momentum, weight_decay=args.weight_decay) else: optimizer = torch.optim.SGD(group_weight(model), lr=args.l_rate, momentum=args.momentum, weight_decay=args.weight_decay) if args.num_cycles > 0: scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=args.n_iter // args.num_cycles, eta_min=args.l_rate * 0.01) else: scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=milestones, gamma=gamma) scheduler_warmup = GradualWarmupScheduler(optimizer, total_epoch=warmup_iter, min_lr_mul=0.1, after_scheduler=scheduler) start_iter = 0 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume, encoding="latin1") model_dict = model.state_dict() if checkpoint.get('model_state', None) is not None: model_dict.update( convert_state_dict(checkpoint['model_state'], load_classifier=args.load_classifier)) else: model_dict.update( convert_state_dict(checkpoint, load_classifier=args.load_classifier)) model.load_state_dict(model_dict) if args.pcen_trainable: pcen_layer_dict = pcen_layer.state_dict() if checkpoint.get('pcen_state', None) is not None: pcen_layer_dict.update( convert_state_dict( checkpoint['pcen_state'], load_classifier=args.load_classifier)) pcen_layer.load_state_dict(pcen_layer_dict) if checkpoint.get('lwlrap', None) is not None: start_iter = checkpoint['iter'] print("Loaded checkpoint '{}' (iter {}, lwlrap {:.5f})".format( args.resume, checkpoint['iter'], checkpoint['lwlrap'])) elif checkpoint.get('iter', None) is not None: start_iter = checkpoint['iter'] print("Loaded checkpoint '{}' (iter {})".format( args.resume, checkpoint['iter'])) if checkpoint.get('optimizer_state', None) is not None: optimizer.load_state_dict(checkpoint['optimizer_state']) del model_dict del checkpoint torch.cuda.empty_cache() else: print("No checkpoint found at '{}'".format(args.resume)) start_iter = args.start_iter if args.start_iter >= 0 else start_iter trainloader_iter = iter(trainloader) optimizer.zero_grad() loss_sum = 0.0 spec_augment = SpecAugment(time_warp_rate=0.1, freq_mask_rate=0.2, time_mask_rate=0.2, num_masks=2) if args.use_spec_aug else None best_lwlrap = 0.0 start_train_time = timeit.default_timer() for i in range(start_iter, args.n_iter): model.train() ##mel_spec_layer.train() pcen_layer.train() if args.num_cycles == 0: scheduler_warmup.step(i) else: scheduler_warmup.step(i // args.num_cycles) try: images, labels, _ = next(trainloader_iter) except: trainloader_iter = iter(trainloader) images, labels, _ = next(trainloader_iter) images = images.cuda() labels = labels.cuda() ##images = mel_spec_layer(images) images = pcen_layer(images) if args.use_mix_up: beta_ab = 0.4 mix_up_alpha = np.random.beta(size=labels.size(0), a=beta_ab, b=beta_ab) mix_up_alpha = np.maximum(mix_up_alpha, 1. - mix_up_alpha) mix_up_alpha = torch.from_numpy(mix_up_alpha).float().cuda() rand_indices = np.arange(labels.size(0)) np.random.shuffle(rand_indices) rand_indices = torch.from_numpy(rand_indices).long().cuda() images2 = torch.index_select(images, dim=0, index=rand_indices) labels2 = torch.index_select(labels, dim=0, index=rand_indices) images = images * mix_up_alpha.unsqueeze(1).unsqueeze(2).unsqueeze( 3) + images2 * ( 1. - mix_up_alpha.unsqueeze(1).unsqueeze(2).unsqueeze(3)) labels = labels * mix_up_alpha.unsqueeze(1) + labels2 * ( 1. - mix_up_alpha.unsqueeze(1)) if args.use_spec_aug: images = spec_augment(images, augs=['freq_mask', 'time_mask']) outputs = model(images) focal_loss = sigmoid_focal_loss_with_logits(outputs, labels, gamma=args.gamma_fl) lovasz_loss = lovasz_hinge(outputs, labels) loss = focal_loss + lovasz_loss loss = loss / float(args.iter_size) loss.backward() loss_sum = loss_sum + loss.item() if (i + 1) % args.print_train_freq == 0: print("Iter [%7d/%7d] Loss: %7.4f" % (i + 1, args.n_iter, loss_sum)) if (i + 1) % args.iter_size == 0: optimizer.step() optimizer.zero_grad() loss_sum = 0.0 if args.eval_freq > 0 and (i + 1) % (args.eval_freq // args.save_freq) == 0: state = { 'iter': i + 1, 'model_state': model.state_dict(), } #'optimizer_state': optimizer.state_dict(),} if args.pcen_trainable: state['pcen_state'] = pcen_layer.state_dict() torch.save( state, "checkpoints/{}_{}_{}_{}x{}_{}_{}-{}_model.pth".format( args.arch, args.dataset, i + 1, args.img_rows, args.img_cols, args.sampling_rate, args.fold_num, args.num_folds)) if args.eval_freq > 0 and (i + 1) % args.eval_freq == 0: y_true = np.zeros((v_loader.__len__(), n_classes), dtype=np.int32) y_prob = np.zeros((v_loader.__len__(), n_classes), dtype=np.float32) mean_loss_val = AverageMeter() model.eval() ##mel_spec_layer.eval() pcen_layer.eval() with torch.no_grad(): for i_val, (images_val, labels_val, _) in tqdm(enumerate(valloader)): images_val = images_val.cuda() labels_val = labels_val.cuda() ##images_val = mel_spec_layer(images_val) images_val = pcen_layer(images_val) if images_val.size( -1 ) > args.img_cols: # split into overlapped chunks stride = (args.img_cols // args.img_cols_div) if ( images_val.size(-1) - args.img_cols) > ( args.img_cols // args.img_cols_div) else ( images_val.size(-1) - args.img_cols) images_val = torch.cat([ images_val[:, :, :, w:w + args.img_cols] for w in range( 0, images_val.size(-1) - args.img_cols + 1, stride) ], dim=0) outputs_val = model(images_val) prob_val = F.sigmoid(outputs_val) outputs_val = outputs_val.mean(0, keepdim=True) prob_val = prob_val.mean(0, keepdim=True) focal_loss_val = sigmoid_focal_loss_with_logits( outputs_val, labels_val, gamma=args.gamma_fl) lovasz_loss_val = lovasz_hinge(outputs_val, labels_val) loss_val = focal_loss_val + lovasz_loss_val mean_loss_val.update(loss_val, n=labels_val.size(0)) y_true[i_val:i_val + labels_val.size(0), :] = labels_val.long().cpu( ).numpy() y_prob[i_val:i_val + labels_val.size(0), :] = prob_val.cpu().numpy() per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap( y_true, y_prob) lwlrap_val = np.sum(per_class_lwlrap * weight_per_class) print('lwlrap: {:.5f}'.format(lwlrap_val)) print('Mean val loss: {:.4f}'.format(mean_loss_val.avg)) state['lwlrap'] = lwlrap_val mean_loss_val.reset() if (i + 1) == args.n_iter: print('per_class_lwlrap: {:.5f} ~ {:.5f}'.format( per_class_lwlrap.min(), per_class_lwlrap.max())) for c in range(n_classes): print('{:50s}: {:.5f} ({:.5f})'.format( v_loader.class_names[c], per_class_lwlrap[c], weight_per_class[c])) torch.save( state, "checkpoints/{}_{}_{}_{}x{}_{}_{}-{}_model.pth".format( args.arch, args.dataset, i + 1, args.img_rows, args.img_cols, args.sampling_rate, args.fold_num, args.num_folds)) if best_lwlrap <= lwlrap_val: best_lwlrap = lwlrap_val torch.save( state, "checkpoints/{}_{}_{}_{}x{}_{}_{}-{}_model.pth".format( args.arch, args.dataset, 'best', args.img_rows, args.img_cols, args.sampling_rate, args.fold_num, args.num_folds)) print( '-- PCEN --\n gain = {:.5f}/{:.5f}\n bias = {:.5f}/{:.5f}\n power = {:.5f}/{:.5f}\n b = {:.5f}/{:.5f}' .format(pcen_layer.log_gain.exp().min().item(), pcen_layer.log_gain.exp().max().item(), pcen_layer.log_bias.exp().min().item(), pcen_layer.log_bias.exp().max().item(), pcen_layer.log_power.exp().min().item(), pcen_layer.log_power.exp().max().item(), pcen_layer.log_b.exp().min().item(), pcen_layer.log_b.exp().max().item())) elapsed_train_time = timeit.default_timer() - start_train_time print('Training time (iter {0:5d}): {1:10.5f} seconds'.format( i + 1, elapsed_train_time)) start_train_time = timeit.default_timer() print('best_lwlrap: {:.5f}'.format(best_lwlrap))
def train(args): # Setup Dataloader data_loader = get_loader('doc3dbmnic') data_path = args.data_path print('Starting . . .') t_loader = data_loader(data_path, is_transform=True, img_size=(args.img_rows, args.img_cols)) v_loader = data_loader(data_path, is_transform=True, split='val', img_size=(args.img_rows, args.img_cols)) n_classes = t_loader.n_classes print('Loading training data . . .') trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=8, shuffle=True) print('Loading validation data . . .') valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=8) # Setup Model print('Loading model . . .') model = get_model(args.arch, n_classes, in_channels=3) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) model.cuda() # Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.l_rate, weight_decay=5e-4, amsgrad=True) # LR Scheduler sched = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True) # Losses MSE = nn.MSELoss() loss_fn = nn.L1Loss() reconst_loss = recon_lossc.Unwarploss() epoch_start = 0 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optimizer_state']) print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) epoch_start = checkpoint['epoch'] else: print("No checkpoint found at '{}'".format(args.resume)) # Log file: if not os.path.exists(args.logdir): os.makedirs(args.logdir) # network_activation(t=[-1,1])_dataset_lossparams_augmentations_trainstart experiment_name = 'dnetccnl_htan_swat3dmini1kbm_l1_noaug_scratch' log_file_name = os.path.join(args.logdir, experiment_name + '.txt') if os.path.isfile(log_file_name): log_file = open(log_file_name, 'a') else: log_file = open(log_file_name, 'w+') log_file.write('\n--------------- ' + experiment_name + ' ---------------\n') log_file.close() # Setup tensorboard for visualization if args.tboard: # save logs in runs/<experiment_name> writer = SummaryWriter(comment=experiment_name) best_val_uwarpssim = 99999.0 best_val_mse = 99999.0 global_step = 0 for epoch in range(epoch_start, args.n_epoch): avg_loss = 0.0 avgl1loss = 0.0 avgrloss = 0.0 avgssimloss = 0.0 train_mse = 0.0 model.train() for i, (images, labels) in enumerate(trainloader): images = Variable(images.cuda()) labels = Variable(labels.cuda()) optimizer.zero_grad() target = model(images[:, 3:, :, :]) target_nhwc = target.transpose(1, 2).transpose(2, 3) l1loss = loss_fn(target_nhwc, labels) rloss, ssim, uworg, uwpred = reconst_loss(images[:, :-1, :, :], target_nhwc, labels) loss = (10.0 * l1loss) + (0.5 * rloss) # + (0.3*ssim) # loss=l1loss avgl1loss += float(l1loss) avg_loss += float(loss) avgrloss += float(rloss) avgssimloss += float(ssim) train_mse += MSE(target_nhwc, labels).item() loss.backward() optimizer.step() global_step += 1 if (i + 1) % 10 == 0: avg_loss = avg_loss / 10 print("Epoch[%d/%d] Batch [%d/%d] Loss: %.4f" % (epoch + 1, args.n_epoch, i + 1, len(trainloader), avg_loss)) avg_loss = 0.0 if args.tboard and (i + 1) % 10 == 0: show_unwarp_tnsboard(global_step, writer, uwpred, uworg, 8, 'Train GT unwarp', 'Train Pred Unwarp') writer.add_scalars( 'Train', { 'BM_L1 Loss/train': avgl1loss / (i + 1), 'CB_Recon Loss/train': avgrloss / (i + 1), 'CB_SSIM Loss/train': avgssimloss / (i + 1) }, global_step) # writer.add_scalar('BM: L1 Loss/train', # avgl1loss/(i+1), global_step) # writer.add_scalar('CB: Recon Loss/train', # avgrloss/(i+1), global_step) # writer.add_scalar('CB: SSIM Loss/train', # avgssimloss/(i+1), global_step) avgssimloss = avgssimloss / len(trainloader) avgrloss = avgrloss / len(trainloader) avgl1loss = avgl1loss / len(trainloader) train_mse = train_mse / len(trainloader) print("Training L1:%4f" % (avgl1loss)) print("Training MSE:'{}'".format(train_mse)) train_losses = [avgl1loss, train_mse, avgrloss, avgssimloss] lrate = get_lr(optimizer) write_log_file(log_file_name, train_losses, epoch + 1, lrate, 'Train') if args.tboard: writer.add_scalar('BM: L1 Loss/train', avgl1loss, epoch + 1) writer.add_scalar('CB: Recon Loss/train', avgrloss, epoch + 1) writer.add_scalar('CB: SSIM Loss/train', avgssimloss, epoch + 1) writer.add_scalar('MSE: MSE/train', train_mse, epoch + 1) model.eval() val_loss = 0.0 val_l1loss = 0.0 val_mse = 0.0 val_rloss = 0.0 val_ssimloss = 0.0 for i_val, (images_val, labels_val) in tqdm(enumerate(valloader)): with torch.no_grad(): images_val = Variable(images_val.cuda()) labels_val = Variable(labels_val.cuda()) target = model(images_val[:, 3:, :, :]) target_nhwc = target.transpose(1, 2).transpose(2, 3) pred = target_nhwc.data.cpu() gt = labels_val.cpu() l1loss = loss_fn(target_nhwc, labels_val) rloss, ssim, uworg, uwpred = reconst_loss( images_val[:, :-1, :, :], target_nhwc, labels_val) val_l1loss += float(l1loss.cpu()) val_rloss += float(rloss.cpu()) val_ssimloss += float(ssim.cpu()) val_mse += float(MSE(pred, gt)) if args.tboard: show_unwarp_tnsboard(epoch + 1, writer, uwpred, uworg, 8, 'Val GT unwarp', 'Val Pred Unwarp') val_l1loss = val_l1loss / len(valloader) val_mse = val_mse / len(valloader) val_ssimloss = val_ssimloss / len(valloader) val_rloss = val_rloss / len(valloader) print("val loss at epoch {}:: {}".format(epoch + 1, val_l1loss)) print("val mse: {}".format(val_mse)) val_losses = [val_l1loss, val_mse, val_rloss, val_ssimloss] write_log_file(log_file_name, val_losses, epoch + 1, lrate, 'Val') if args.tboard: # log the val losses writer.add_scalar('BM: L1 Loss/val', val_l1loss, epoch + 1) writer.add_scalar('CB: Recon Loss/val', val_rloss, epoch + 1) writer.add_scalar('CB: SSIM Loss/val', val_ssimloss, epoch + 1) writer.add_scalar('MSE: MSE/val', val_mse, epoch + 1) if args.tboard: # plot train against val writer.add_scalars('BM_L1_Loss', { 'train': avgl1loss, 'val': val_l1loss }, epoch + 1) writer.add_scalars('CB_Recon_Loss', { 'train': avgrloss, 'val': val_rloss }, epoch + 1) writer.add_scalars('CB_SSIM_Loss', { 'train': avgssimloss, 'val': val_ssimloss }, epoch + 1) writer.add_scalars('MSE_Mean_square_error', { 'train': train_mse, 'val': val_mse }, epoch + 1) # reduce learning rate sched.step(val_mse) if val_mse < best_val_mse: best_val_mse = val_mse state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), } torch.save( state, args.logdir + "{}_{}_{}_{}_{}_best_model.pkl".format( args.arch, epoch + 1, val_mse, train_mse, experiment_name)) if (epoch + 1) % 10 == 0: state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), } torch.save( state, args.logdir + "{}_{}_{}_{}_{}_model.pkl".format( args.arch, epoch + 1, val_mse, train_mse, experiment_name))
def merge(args): # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, split=args.split, is_transform=True, img_size=(args.img_rows, args.img_cols), no_gt=args.no_gt, seed=args.seed, use_external=args.use_external) n_classes = loader.n_classes testloader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=4, pin_memory=True) avg_y_prob = np.zeros((loader.__len__(), n_classes), dtype=np.float32) fold_list = [] for fold_num in range(args.num_folds): prob_file_name = 'prob-{}_{}x{}_{}_{}_{}-{}.npy'.format(args.split, args.img_rows, args.img_cols, args.model_name, args.n_epoch, fold_num, args.num_folds) if os.path.exists(prob_file_name): prob = np.load(prob_file_name) avg_y_prob = avg_y_prob + prob fold_list.append(fold_num) avg_y_prob = avg_y_prob / len(fold_list) avgprob_file_name = 'prob-{}_{}x{}_{}_{}_[{}]-{}_avg.npy'.format(args.split, args.img_rows, args.img_cols, args.model_name, args.n_epoch, ','.join(map(str, fold_list)), args.num_folds) np.save(avgprob_file_name, avg_y_prob) weak_samples = 0 y_true = np.zeros((loader.__len__(), n_classes), dtype=np.int32) y_pred = np.zeros((loader.__len__(), n_classes), dtype=np.int32) y_pow = np.zeros((loader.__len__(),), dtype=np.int64) pow_base = 2 ** np.arange(n_classes) pred_dict = collections.OrderedDict() if args.use_leak: leak_df = pd.read_csv(os.path.join(data_path, 'TestEtraMatchingUnder_259_R14_G12_B10.csv'), index_col='Test')[['Extra', 'SimR', 'SimG', 'SimB']] leak_dict = leak_df.to_dict('index') for i, (images, labels, names) in tqdm(enumerate(testloader)): prob = avg_y_prob[i*args.batch_size:i*args.batch_size+images.size(0), :] if not args.no_gt: y_true[i*args.batch_size:i*args.batch_size+images.size(0), :] = labels.long().cpu().numpy() y_pred[i*args.batch_size:i*args.batch_size+images.size(0), :] = (prob >= args.thresh).astype(np.int32) for k in range(images.size(0)): pred = np.where(y_pred[i*args.batch_size+k, :] == 1)[0].tolist() if len(pred) == 0: pred = [np.argmax(prob, axis=1)[k]] y_pred[i*args.batch_size+k, pred] = 1 weak_samples += 1 name = names[0][k] if args.use_leak: if leak_dict.get(name, None) is not None: sum_sim = leak_dict[name]['SimR'] + leak_dict[name]['SimG'] + leak_dict[name]['SimB'] if sum_sim <= 16:#4: extra_label_name = '_'.join(leak_dict[name]['Extra'].split('_')[1:]) if loader.train_labels.get(extra_label_name, None) is not None: pred_dict[name] = loader.train_labels[extra_label_name]['Target'] if pred_dict.get(name, None) is None: pred_dict[name] = ' '.join(map(str, pred)) y_pow[i*args.batch_size:i*args.batch_size+images.size(0)] = (y_pred[i*args.batch_size:i*args.batch_size+images.size(0), :] * pow_base).sum(1) if not args.no_gt: f1_score_val = f1_score(y_true, y_pred, labels=[l for l in range(n_classes)], average='macro') print('F1-score (macro): {:.5f}'.format(f1_score_val)) for i in range(n_classes): num = y_pred[:, i].sum() print('{:2d}: {:5d} ({:.5f}) | {:5d} ({:.5f})'.format(i, num, float(num)/y_pred.sum(), loader.class_num_samples_train[i].long(), loader.class_num_samples_train[i]/loader.class_num_samples_train.sum())) print('# of weak samples: {}'.format(weak_samples)) uni, cnt = np.unique(y_pow, return_counts=True) sorted_idx = np.argsort(cnt) for i in range(len(uni)*9//10, len(uni)): uni_b = '{:028b}'.format(uni[sorted_idx[i]]) cls = [] for j in range(28): if int(uni_b[27-j]) == 1: cls.append(j) print('{:20s} {:5d}'.format(cls, cnt[sorted_idx[i]])) # Create submission csv_file_name = '{}_{}x{}_{}_{}_[{}]-{}_{}'.format(args.split, args.img_rows, args.img_cols, args.model_name, args.n_epoch, ','.join(map(str, fold_list)), args.num_folds, args.thresh) csv_file_name = csv_file_name + '_avg_leak.csv' if args.use_leak else csv_file_name + '_avg.csv' sub = pd.DataFrame.from_dict(pred_dict, orient='index') sub.index.names = ['Id'] sub.columns = ['Predicted'] sub.to_csv(csv_file_name)
def main(): global args, best_score, best_epoch best_score, best_epoch = -1, -1 if len(sys.argv) > 1: args = parse_args() print('----- Experiments parameters -----') for k, v in args.__dict__.items(): print(k, ':', v) else: print( 'Please provide some parameters for the current experiment. Check-out arg.py for more info!' ) sys.exit() # init random seeds utils.setup_env(args) # init tensorboard summary is asked tb_writer = SummaryWriter(f'{args.data_dir}/runs/{args.name}/tensorboard' ) if args.tensorboard else None # init data loaders loader = get_loader(args) train_loader = torch.utils.data.DataLoader(loader( path_to_data=args.data_dir, mode='TRAIN'), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(loader(path_to_data=args.data_dir, mode='VAL'), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) exp_logger, lr = None, None model = get_model(args) criterion = losses.get_criterion(args) # optionally resume from a checkpoint if args.resume: model, exp_logger, args.start_epoch, best_score, best_epoch, lr = load_checkpoint( args, model) args.lr = lr else: # create all output folders utils.init_output_env(args) if exp_logger is None: exp_logger = init_logger(args, model) optimizer, scheduler = optimizers.get_optimizer(args, model) print(' + Number of params: {}'.format(utils.count_params(model))) model.to(args.device) criterion.to(args.device) if args.test: test_loader = torch.utils.data.DataLoader(loader( path_to_data=args.data_dir, mode='TEST'), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) trainer.test(args, test_loader, model, criterion, args.start_epoch, eval_score=metrics.accuracy_regression, output_dir=args.out_pred_dir, has_gt=True) sys.exit() is_best = True for epoch in range(args.start_epoch, args.epochs + 1): print('Current epoch: ', epoch) trainer.train(args, train_loader, model, criterion, optimizer, exp_logger, epoch, eval_score=metrics.accuracy_regression, tb_writer=tb_writer) # evaluate on validation set val_mae, val_squared_mse, val_loss = trainer.validate( args, val_loader, model, criterion, exp_logger, epoch, eval_score=metrics.accuracy_regression, tb_writer=tb_writer) # update learning rate if scheduler is None: trainer.adjust_learning_rate(args, optimizer, epoch) else: prev_lr = optimizer.param_groups[0]['lr'] if 'ReduceLROnPlateau' == args.scheduler: scheduler.step(val_loss) else: scheduler.step() print( f"Updating learning rate from {prev_lr} to {optimizer.param_groups[0]['lr']}" ) # remember best acc and save checkpoint is_best = val_mae < best_score best_score = min(val_mae, best_score) if True == is_best: best_epoch = epoch save_checkpoint( args, { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_score': best_score, 'best_epoch': best_epoch, 'exp_logger': exp_logger, }, is_best) # write plots to disk generate_plots(args, exp_logger, is_best=is_best) # generate html report logger.export_logs(args, epoch, best_epoch) if args.tensorboard: tb_writer.close() print("That's all folks!")
def train(cfg, writer, logger): # This statement must be declared before using pytorch use_cuda = False if cfg.get("cuda", None) is not None: if cfg.get("cuda", None) != "all": os.environ["CUDA_VISIBLE_DEVICES"] = cfg.get("cuda", None) use_cuda = torch.cuda.is_available() # Setup random seed seed = cfg["training"].get("seed", random.randint(1, 10000)) torch.manual_seed(seed) if use_cuda: torch.cuda.manual_seed(seed) np.random.seed(seed) random.seed(seed) # Setup Dataloader train_loader, val_loader = get_loader(cfg) # Setup Model model = get_model(cfg) # writer.add_graph(model, torch.rand([1, 3, 224, 224])) if use_cuda and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model, device_ids=list( range(torch.cuda.device_count()))) # Setup optimizer, lr_scheduler and loss function optimizer = get_optimizer(model.parameters(), cfg) scheduler = get_scheduler(optimizer, cfg) loss_fn = get_loss_fn(cfg) # Setup Metrics epochs = cfg["training"]["epochs"] recorder = RecorderMeter(epochs) start_epoch = 0 # save model parameters every <n> epochs save_interval = cfg["training"]["save_interval"] if use_cuda: model.cuda() loss_fn.cuda() # Resume Trained Model resume_path = os.path.join(writer.file_writer.get_logdir(), cfg["training"]["resume"]) best_path = os.path.join(writer.file_writer.get_logdir(), cfg["training"]["best_model"]) if cfg["training"]["resume"] is not None: if os.path.isfile(resume_path): logger.info( "Loading model and optimizer from checkpoint '{}'".format( resume_path)) checkpoint = torch.load(resume_path) state = checkpoint["state_dict"] if torch.cuda.device_count() <= 1: state = convert_state_dict(state) model.load_state_dict(state) optimizer.load_state_dict(checkpoint["optimizer"]) scheduler.load_state_dict(checkpoint["scheduler"]) start_epoch = checkpoint["epoch"] recorder = checkpoint['recorder'] logger.info("Loaded checkpoint '{}' (epoch {})".format( resume_path, checkpoint["epoch"])) else: logger.info("No checkpoint found at '{}'".format(resume_path)) epoch_time = AverageMeter() for epoch in range(start_epoch, epochs): start_time = time.time() need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (epochs - epoch)) need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format( need_hour, need_mins, need_secs) logger.info( '\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:8.6f}]'. format(time_string(), epoch, epochs, need_time, optimizer. param_groups[0]['lr']) + # scheduler.get_last_lr() >=1.4 ' [Best : Accuracy={:.2f}]'.format(recorder.max_accuracy(False))) train_acc, train_los = train_epoch(train_loader, model, loss_fn, optimizer, use_cuda, logger) val_acc, val_los = validate_epoch(val_loader, model, loss_fn, use_cuda, logger) scheduler.step() is_best = recorder.update(epoch, train_los, train_acc, val_los, val_acc) if is_best or epoch % save_interval == 0 or epoch == epochs - 1: # save model (resume model and best model) save_checkpoint( { 'epoch': epoch + 1, 'recorder': recorder, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), }, is_best, best_path, resume_path) for name, param in model.named_parameters(): # save histogram writer.add_histogram(name, param.clone().cpu().data.numpy(), epoch) writer.add_scalar('Train/loss', train_los, epoch) # save curves writer.add_scalar('Train/acc', train_acc, epoch) writer.add_scalar('Val/loss', val_los, epoch) writer.add_scalar('Val/acc', val_acc, epoch) epoch_time.update(time.time() - start_time) writer.close()
def __init__(self, fullname, source, obj): self.fullname = fullname self.obj = obj self.source = source self.loader = get_loader(obj['ext'])
def train(args): if not os.path.exists('checkpoints'): os.mkdir('checkpoints') # Setup Augmentations data_aug = transforms.Compose([ transforms.ToPILImage(), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomVerticalFlip(p=0.5), transforms.RandomAffine(degrees=20, translate=(0.1, 0.1), scale=(0.9, 1.0 / 0.9)), ]) # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train', img_size=(args.img_rows, args.img_cols), augmentations=data_aug, fold_num=args.fold_num, num_folds=args.num_folds, seed=args.seed, use_external=args.use_external) v_loader = data_loader(data_path, is_transform=True, split='val', img_size=(args.img_rows, args.img_cols), fold_num=args.fold_num, num_folds=args.num_folds, seed=args.seed, use_external=args.use_external) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) n_classes = t_loader.n_classes trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=2, pin_memory=True, shuffle=True, drop_last=True) valloader = data.DataLoader(v_loader, batch_size=1, num_workers=2, pin_memory=True) # Setup Model model = get_model(args.arch, n_classes, use_cbam=args.use_cbam) model.cuda() # Check if model has custom optimizer / loss if hasattr(model, 'optimizer'): optimizer = model.optimizer else: milestones = [5, 10, 15] gamma = 0.2 optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=args.l_rate, momentum=args.momentum, weight_decay=args.weight_decay) ##optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.l_rate, weight_decay=args.weight_decay) if args.num_cycles > 0: scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=args.n_epoch * len(trainloader) // args.num_cycles, eta_min=args.l_rate * (gamma**len(milestones))) else: scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=milestones, gamma=gamma) if hasattr(model, 'loss'): print('Using custom loss') loss_fn = model.loss else: loss_fn = F.binary_cross_entropy_with_logits start_epoch = 0 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) model_dict = model.state_dict() if checkpoint.get('model_state', None) is not None: model_dict.update( convert_state_dict(checkpoint['model_state'], load_classifier=args.load_classifier)) else: model_dict.update( convert_state_dict(checkpoint, load_classifier=args.load_classifier)) if checkpoint.get('f1_score', None) is not None: start_epoch = checkpoint['epoch'] print("Loaded checkpoint '{}' (epoch {}, f1_score {:.5f})". format(args.resume, checkpoint['epoch'], checkpoint['f1_score'])) elif checkpoint.get('epoch', None) is not None: start_epoch = checkpoint['epoch'] print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) model.load_state_dict(model_dict) if checkpoint.get('optimizer_state', None) is not None: optimizer.load_state_dict(checkpoint['optimizer_state']) else: print("No checkpoint found at '{}'".format(args.resume)) for epoch in range(start_epoch, args.n_epoch): start_train_time = timeit.default_timer() if args.num_cycles == 0: scheduler.step(epoch) recon_scale = 4 bce_loss_sum = 0.0 cooc_loss_sum = 0.0 mse_loss_sum = 0.0 model.train() optimizer.zero_grad() for i, (images, labels, _) in enumerate(trainloader): if args.num_cycles > 0 and (i + 1) % args.iter_size == 0: iter_num = i + epoch * len(trainloader) scheduler.step( iter_num % (args.n_epoch * len(trainloader) // args.num_cycles)) # Cosine Annealing with Restarts images = images.cuda() labels = labels.cuda() images_ref = images.clone( ) # for image (4 channels) reconstruction if recon_scale != 4: images_ref = F.interpolate(images_ref, scale_factor=recon_scale / 4., mode='bilinear', align_corners=False) sum_labels = labels.sum(1).long() sum_labels = torch.where(sum_labels <= 4, sum_labels, torch.zeros_like(sum_labels)) outputs, outputs_cooc, recons = model(images, recon_scale=recon_scale) bce_loss = loss_fn(outputs[:labels.size(0), :], labels, pos_weight=t_loader.loss_weights) bce_loss = bce_loss / float(args.iter_size) bce_loss_sum = bce_loss_sum + bce_loss cooc_loss = F.cross_entropy(outputs_cooc[:labels.size(0), :], sum_labels) cooc_loss = cooc_loss / float(args.iter_size) cooc_loss = args.lambda_cooc_loss * cooc_loss cooc_loss_sum = cooc_loss_sum + cooc_loss mse_loss = F.mse_loss(recons, images_ref) mse_loss = mse_loss / float(args.iter_size) mse_loss = args.lambda_mse_loss * mse_loss mse_loss_sum = mse_loss_sum + mse_loss loss = bce_loss + cooc_loss + mse_loss loss.backward() if (i + 1) % args.print_train_freq == 0: print( "Epoch [%3d/%3d] Iter [%6d/%6d] Loss: BCE %.4f / COOC %.4f / MSE %.4f" % (epoch + 1, args.n_epoch, i + 1, len(trainloader), bce_loss_sum, cooc_loss_sum, mse_loss_sum)) if (i + 1) % args.iter_size == 0 or i == len(trainloader) - 1: optimizer.step() optimizer.zero_grad() bce_loss_sum = 0.0 cooc_loss_sum = 0.0 mse_loss_sum = 0.0 state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), } #'optimizer_state': optimizer.state_dict(),} torch.save( state, "checkpoints/{}_{}_{}_{}x{}_{}-{}_model.pth".format( args.arch, args.dataset, epoch + 1, args.img_rows, args.img_cols, args.fold_num, args.num_folds)) if (epoch + 1) % args.eval_freq == 0: weak_samples = 0 thresh = 0.5 y_true = np.zeros((v_loader.__len__(), n_classes), dtype=np.int32) y_pred = np.zeros((v_loader.__len__(), n_classes), dtype=np.int32) mean_loss_val = AverageMeter() model.eval() with torch.no_grad(): for i_val, (images_val, labels_val, _) in tqdm(enumerate(valloader)): images_val = images_val.cuda() labels_val = labels_val.cuda() outputs_val = model(images_val) prob = F.sigmoid(outputs_val) max_pred = prob.max(1)[1].cpu().numpy() pred = (prob >= thresh) pred_sum = pred.sum(1) bce_loss_val = loss_fn(outputs_val, labels_val, pos_weight=v_loader.loss_weights) loss_val = bce_loss_val mean_loss_val.update(loss_val, n=images_val.size(0)) y_true[i_val, :] = labels_val.long().cpu().numpy() y_pred[i_val, :] = pred.long().cpu().numpy() for k in range(images_val.size(0)): if pred_sum[k] == 0: y_pred[i_val, max_pred[k]] = 1 weak_samples += 1 f1_score_val = f1_score(y_true, y_pred, labels=[l for l in range(n_classes)], average='macro') print('F1-score (macro): {:.5f}'.format(f1_score_val)) print('Mean val loss: {:.4f}'.format(mean_loss_val.avg)) state['f1_score'] = f1_score_val mean_loss_val.reset() for k in range(n_classes): num = y_pred[:, k].sum() print('{:2d}: {:5d} ({:.5f}) | {:5d} ({:.5f})'.format( k, num, float(num) / y_pred.sum(), v_loader.class_num_samples[k].long(), v_loader.class_num_samples[k] / v_loader.class_num_samples.sum())) print('# of weak samples: {}'.format(weak_samples)) torch.save( state, "checkpoints/{}_{}_{}_{}x{}_{}-{}_model.pth".format( args.arch, args.dataset, epoch + 1, args.img_rows, args.img_cols, args.fold_num, args.num_folds)) elapsed_train_time = timeit.default_timer() - start_train_time print('Training time (epoch {0:5d}): {1:10.5f} seconds'.format( epoch + 1, elapsed_train_time))
def train(args): # Setup Dataloader data_loader = get_loader('doc3dwc') data_path = args.data_path t_loader = data_loader(data_path, is_transform=True, img_size=(args.img_rows, args.img_cols), augmentations=False) v_loader = data_loader(data_path, is_transform=True, split='val', img_size=(args.img_rows, args.img_cols)) n_classes = t_loader.n_classes trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=8, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=8) # Setup Model model = get_model(args.arch, n_classes, in_channels=3) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) model.cuda() # Activation htan = nn.Hardtanh(0, 1.0) # Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.l_rate, weight_decay=5e-4, amsgrad=True) # LR Scheduler sched = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True) # Losses MSE = nn.MSELoss() loss_fn = nn.L1Loss() gloss = grad_loss.Gradloss(window_size=5, padding=2) epoch_start = 0 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['model_state']) # optimizer.load_state_dict(checkpoint['optimizer_state']) print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) epoch_start = checkpoint['epoch'] else: print("No checkpoint found at '{}'".format(args.resume)) # Log file: if not os.path.exists(args.logdir): os.makedirs(args.logdir) # activation_dataset_lossparams_augmentations_trainstart experiment_name = 'htan_doc3d_l1grad_bghsaugk_scratch' log_file_name = os.path.join(args.logdir, experiment_name + '.txt') if os.path.isfile(log_file_name): log_file = open(log_file_name, 'a') else: log_file = open(log_file_name, 'w+') log_file.write('\n--------------- ' + experiment_name + ' ---------------\n') log_file.close() # Setup tensorboard for visualization if args.tboard: # save logs in runs/<experiment_name> writer = SummaryWriter(comment=experiment_name) best_val_mse = 99999.0 global_step = 0 for epoch in range(epoch_start, args.n_epoch): avg_loss = 0.0 avg_l1loss = 0.0 avg_gloss = 0.0 train_mse = 0.0 model.train() for i, (images, labels) in enumerate(trainloader): images = Variable(images.cuda()) labels = Variable(labels.cuda()) optimizer.zero_grad() outputs = model(images) pred = htan(outputs) g_loss = gloss(pred, labels) l1loss = loss_fn(pred, labels) loss = l1loss # +(0.2*g_loss) avg_l1loss += float(l1loss) avg_gloss += float(g_loss) avg_loss += float(loss) train_mse += float(MSE(pred, labels).item()) loss.backward() optimizer.step() global_step += 1 if (i + 1) % 10 == 0: print("Epoch[%d/%d] Batch [%d/%d] Loss: %.4f" % (epoch + 1, args.n_epoch, i + 1, len(trainloader), avg_loss / 10.0)) avg_loss = 0.0 if args.tboard and (i + 1) % 10 == 0: show_wc_tnsboard(global_step, writer, images, labels, pred, 8, 'Train Inputs', 'Train WCs', 'Train Pred. WCs') writer.add_scalars( 'Train', { 'WC_L1 Loss/train': avg_l1loss / (i + 1), 'WC_Grad Loss/train': avg_gloss / (i + 1) }, global_step) train_mse = train_mse / len(trainloader) avg_l1loss = avg_l1loss / len(trainloader) avg_gloss = avg_gloss / len(trainloader) print("Training L1:%4f" % (avg_l1loss)) print("Training MSE:'{}'".format(train_mse)) train_losses = [avg_l1loss, train_mse, avg_gloss] lrate = get_lr(optimizer) write_log_file(experiment_name, train_losses, epoch + 1, lrate, 'Train') model.eval() val_loss = 0.0 val_mse = 0.0 val_bg = 0.0 val_fg = 0.0 val_gloss = 0.0 val_dloss = 0.0 for i_val, (images_val, labels_val) in tqdm(enumerate(valloader)): with torch.no_grad(): images_val = Variable(images_val.cuda()) labels_val = Variable(labels_val.cuda()) outputs = model(images_val) pred_val = htan(outputs) g_loss = gloss(pred_val, labels_val).cpu() pred_val = pred_val.cpu() labels_val = labels_val.cpu() loss = loss_fn(pred_val, labels_val) val_loss += float(loss) val_mse += float(MSE(pred_val, labels_val)) val_gloss += float(g_loss) val_loss = val_loss / len(valloader) val_mse = val_mse / len(valloader) val_gloss = val_gloss / len(valloader) print("val loss at epoch {}:: {}".format(epoch + 1, val_loss)) print("val MSE: {}".format(val_mse)) if args.tboard: show_wc_tnsboard(epoch + 1, writer, images_val, labels_val, pred, 8, 'Val Inputs', 'Val WCs', 'Val Pred. WCs') writer.add_scalars('L1', { 'L1_Loss/train': avg_l1loss, 'L1_Loss/val': val_loss }, epoch + 1) writer.add_scalars('GLoss', { 'Grad Loss/train': avg_gloss, 'Grad Loss/val': val_gloss }, epoch + 1) val_losses = [val_loss, val_mse, val_gloss] write_log_file(experiment_name, val_losses, epoch + 1, lrate, 'Val') # reduce learning rate sched.step(val_mse) if val_mse < best_val_mse: best_val_mse = val_mse state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), } torch.save( state, args.logdir + "{}_{}_{}_{}_{}_best_model.pkl".format( args.arch, epoch + 1, val_mse, train_mse, experiment_name)) if (epoch + 1) % 10 == 0: state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), } torch.save( state, args.logdir + "{}_{}_{}_{}_{}_model.pkl".format( args.arch, epoch + 1, val_mse, train_mse, experiment_name))
def train(args): if not os.path.exists('checkpoints'): os.mkdir('checkpoints') # Setup Augmentations data_aug = transforms.Compose([ transforms.ToPILImage(), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomAffine(degrees=10, translate=(0.05, 0.05), scale=(0.95, 1.05)), ]) # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train', version='simplified', img_size=(args.img_rows, args.img_cols), augmentations=data_aug, train_fold_num=args.train_fold_num, num_train_folds=args.num_train_folds, seed=args.seed) v_loader = data_loader(data_path, is_transform=True, split='val', version='simplified', img_size=(args.img_rows, args.img_cols), num_val=args.num_val, seed=args.seed) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) n_classes = t_loader.n_classes trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=2, shuffle=True, pin_memory=True, drop_last=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=2, pin_memory=True) # Setup Metrics running_metrics = runningScore(n_classes) # Setup Model v_demision = 300 model = get_model(args.arch, v_demision, use_cbam=args.use_cbam) model.cuda() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.l_rate, weight_decay=args.weight_decay) if args.num_cycles > 0: len_trainloader = int(5e6) # 4960414 scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=args.num_train_folds * len_trainloader // args.num_cycles, eta_min=args.l_rate * 1e-1) else: scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[2, 4, 6, 8], gamma=0.5) start_epoch = 0 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) model_dict = model.state_dict() if checkpoint.get('model_state', -1) == -1: model_dict.update( convert_state_dict(checkpoint, load_classifier=args.load_classifier)) else: model_dict.update( convert_state_dict(checkpoint['model_state'], load_classifier=args.load_classifier)) print( "Loaded checkpoint '{}' (epoch {}, mapk {:.5f}, top1_acc {:7.3f}, top2_acc {:7.3f} top3_acc {:7.3f})" .format(args.resume, checkpoint['epoch'], checkpoint['mapk'], checkpoint['top1_acc'], checkpoint['top2_acc'], checkpoint['top3_acc'])) model.load_state_dict(model_dict) if checkpoint.get('optimizer_state', None) is not None: optimizer.load_state_dict(checkpoint['optimizer_state']) start_epoch = checkpoint['epoch'] else: print("No checkpoint found at '{}'".format(args.resume)) loss_sum = 0.0 for epoch in range(start_epoch, args.n_epoch): start_train_time = timeit.default_timer() if args.num_cycles == 0: scheduler.step(epoch) model.train() optimizer.zero_grad() for i, (images, labels, recognized, _) in enumerate(trainloader): if args.num_cycles > 0: iter_num = i + epoch * len_trainloader scheduler.step( iter_num % (args.num_train_folds * len_trainloader // args.num_cycles)) # Cosine Annealing with Restarts images = images.cuda() labels = labels.cuda() outputs = model(images) a_loss = Adptive_loss().cuda() loss = a_loss(outputs, labels) loss = loss / float(args.iter_size) # Accumulated gradients loss_sum = loss_sum + loss loss.backward() if (i + 1) % args.print_train_freq == 0: print("Epoch [%d/%d] Iter [%6d/%6d] Loss: %.4f" % (epoch + 1, args.n_epoch, i + 1, len(trainloader), loss_sum)) if (i + 1) % args.iter_size == 0 or i == len(trainloader) - 1: optimizer.step() optimizer.zero_grad() loss_sum = 0.0 elapsed_train_time = timeit.default_timer() - start_train_time print('Training time (epoch {0:5d}): {1:10.5f} seconds'.format( epoch + 1, elapsed_train_time))
def forward(self, position_feature): # inputs [B, max_lenth] positions_encoded = self.position_encoding(position_feature) return positions_encoded if __name__ == '__main__': import ipdb from loaders import get_loader from vocabulary import Vocab from configs_transformer import DefaultConfig from tqdm import tqdm args = DefaultConfig args.batch_size = 2 loader = get_loader('train', args.batch_size) vocab = Vocab() for i in tqdm(loader): feature, captions = [j for j in i] model = VGGTransformerNew(vocab, args) output_log_prob, output_token = model(feature, captions.long()) token = model.greedy_search(feature[:, 0]) loss = output_log_prob.sum() loss.backward() d = [] for i in model.named_parameters(): if i[0][:3] != 'vgg': print(i[0]) print(i[1].grad) input('next')
def main(): global args, best_score, best_epoch best_score, best_epoch = -1, -1 if len(sys.argv) > 1: args = parse_args() print('----- Experiments parameters -----') for k, v in args.__dict__.items(): print(k, ':', v) else: print('Please provide some parameters for the current experiment. Check-out args.py for more info!') sys.exit() # init random seeds utils.setup_env(args) # init tensorboard summary is asked tb_writer = SummaryWriter(f'{args.data_dir}/runs/{args.name}/tensorboard') if args.tensorboard else None # init data loaders loader = get_loader(args) train_loader = torch.utils.data.DataLoader(loader(data_dir=args.data_dir, split='train', min_size=args.min_size_train, max_size=args.max_size_train, dataset_size=args.dataset_size_train), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, collate_fn=lambda x: x, pin_memory=True) val_loader = torch.utils.data.DataLoader(loader(data_dir=args.data_dir, split='val', min_size=args.min_size_val, max_size=args.max_size_val, dataset_size=args.dataset_size_val), batch_size=1, shuffle=False, num_workers=args.workers, collate_fn=lambda x: x, pin_memory=True) exp_logger, lr = None, None model = get_model(args) criterion = losses.get_criterion(args) # optionally resume from a checkpoint if args.resume: model, exp_logger, args.start_epoch, best_score, best_epoch, lr = load_checkpoint(args, model) args.lr = lr else: # create all output folders utils.init_output_env(args) if exp_logger is None: exp_logger = init_logger(args, model) optimizer, scheduler = optimizers.get_optimizer(args, model) print(' + Number of params: {}'.format(utils.count_params(model))) model.to(args.device) criterion.to(args.device) if args.test: test_loader = torch.utils.data.DataLoader(loader(data_dir=args.data_dir, split='test', min_size=args.min_size_val, max_size=args.max_size_val, dataset_size=args.dataset_size_val), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, collate_fn=lambda x: x, pin_memory=True) trainer.test(args, test_loader, model, criterion, args.start_epoch, eval_score=metrics.get_score(args.test_type), output_dir=args.out_pred_dir, has_gt=True, print_freq=args.print_freq_val) sys.exit() is_best = True for epoch in range(args.start_epoch, args.epochs + 1): print('Current epoch:', epoch) trainer.train(args, train_loader, model, criterion, optimizer, exp_logger, epoch, eval_score=metrics.get_score(args.train_type), print_freq=args.print_freq_train, tb_writer=tb_writer) # evaluate on validation set mAP, val_loss = trainer.validate(args, val_loader, model, criterion, exp_logger, epoch, eval_score=metrics.get_score(args.val_type), print_freq=args.print_freq_val, tb_writer=tb_writer) # Update learning rate if scheduler is None: trainer.adjust_learning_rate(args, optimizer, epoch) else: prev_lr = optimizer.param_groups[0]['lr'] if 'ReduceLROnPlateau' == args.scheduler: scheduler.step(val_loss) else: scheduler.step() print(f"Updating learning rate from {prev_lr} to {optimizer.param_groups[0]['lr']}") # remember best acc and save checkpoint is_best = mAP > best_score best_score = max(mAP, best_score) if True == is_best: best_epoch = epoch save_checkpoint(args, { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_score': best_score, 'best_epoch': best_epoch, 'exp_logger': exp_logger, }, is_best) if args.tensorboard: tb_writer.close() print(" ***** Processes all done. *****")
me_input_resizer = KeypointsDownsampler(tgt_size=64, mode='bilinear') generator.eval() generator.to(device) encoder.eval() encoder.to(device) mask_estimator.eval() mask_estimator.to(device) position_proposer = PositionProposer(256) image_size = args.image_size dataset = SimpleLoader(data_file='./data/YBB/test_samples_100.th') ybb_loader = iter(DataLoader(dataset, batch_size=1, shuffle=True)) cityscapes_loader = get_loader(cs_dir, target_type='semantic') loader = iter(DataLoader(cityscapes_loader[1], batch_size=1, shuffle=True)) start = time.time() for sample_id, sample in enumerate(cityscapes_loader[0], 1): if START_SAMPLE_ID and sample_id < START_SAMPLE_ID: continue if END_SAMPLE_ID and sample_id == END_SAMPLE_ID: break image_path, image, segmentation = sample ext_len = len('_leftImg8bit.jpg') if '_leftImg8bit.png' != image_path[ -ext_len:] and '_leftImg8bit.jpg' != image_path[ext_len:]: print('Skip {}. Invalid extension {}'.format( image_path, image_path[-ext_len:]))