def train(config): config.check_constraint() cudnn.benchmark = True assert config.model_type in ['U_Net'] # Create directories if not exist if not os.path.exists(config.model_path): os.makedirs(config.model_path) if not os.path.exists(config.result_path): os.makedirs(config.result_path) if not os.path.exists(config.result_path): os.makedirs(config.result_path) logging.info(config) train_loader = get_loader(config, mode='train') valid_loader = get_loader(config, mode='valid') test_loader = get_loader(config, mode='test') solve = utils.solver.Solver(config, train_loader, valid_loader, test_loader) # Train and sample the images if config.mode == 'train': solve.run() elif config.mode == 'test': solve.test()
def __init_data_loader(self, data_json): data_loader = get_loader(image_dir=self.args.image_dir, caption_json=self.args.caption_json, data_json=data_json, vocabulary=self.vocab, transform=self.transform, batch_size=self.args.batch_size, shuffle=True) return data_loader
def __init__(self, args): super(Trainer, self).__init__() self.args = args train_dataset = data_factory[args.dataset](self.args, 'train') self.train_loader = get_loader(train_dataset, args, 'train') self.num_classes = train_dataset.num_classes val_dataset = data_factory[args.dataset](self.args, 'val') self.val_loader = get_loader(val_dataset, args, 'val') self.model = model_factory[args.model](self.args, self.num_classes) self.model.cuda() trainable_parameters = filter(lambda param: param.requires_grad, self.model.parameters()) if self.args.optimizer == 'Adam': self.optimizer = Adam(trainable_parameters, lr=self.args.lr) elif self.args.optimizer == 'SGD': self.optimizer = SGD(trainable_parameters, lr=self.args.lr) self.lr_scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=2, verbose=True) if self.args.loss == 'BCElogitloss': self.criterion = nn.BCEWithLogitsLoss() elif self.args.loss == 'tencentloss': self.criterion = TencentLoss(self.num_classes) elif self.args.loss == 'focalloss': self.criterion = FocalLoss() self.early_stopping = EarlyStopping(patience=5) self.voc12_mAP = VOC12mAP(self.num_classes) self.average_loss = AverageLoss(self.args.batch_size) self.average_topk_meter = TopkAverageMeter(self.num_classes, topk=self.args.topk) self.average_threshold_meter = ThresholdAverageMeter( self.num_classes, threshold=self.args.threshold) self.global_step = 0 self.writer = get_summary_writer(self.args)
def main(args): # dataset setting image_root = args.image_root ann_path = args.ann_path vocab_path = args.vocab_path batch_size = args.batch_size shuffle = args.shuffle num_workers = args.num_workers with open(vocab_path, 'rb') as f: vocab = pickle.load(f) dataloader = get_loader(image_root, ann_path, vocab, batch_size, shuffle=True, num_workers=args.num_workers) # model setting vis_dim = args.vis_dim vis_num = args.vis_num embed_dim = args.embed_dim hidden_dim = args.hidden_dim vocab_size =args.vocab_size num_layers = args.num_layers dropout_ratio = args.dropout_ratio model = Decoder(vis_dim=vis_dim, vis_num=vis_num, embed_dim=embed_dim, hidden_dim=hidden_dim, vocab_size=vocab_size, num_layers=num_layers, dropout_ratio=dropout_ratio) # optimizer setting lr = args.lr num_epochs = args.num_epochs optimizer = optim.Adam(model.parameters(), lr=lr) # criterion criterion = nn.CrossEntropyLoss() if cuda_check: model.cuda() criterion.cuda() model.train() print('Number of epochs:', num_epochs) for epoch in range(num_epochs): train(dataloader=dataloader, model=model, optimizer=optimizer, criterion=criterion, epoch=epoch, total_epoch=num_epochs) torch.save(model, './checkpoints/model_%d.pth'%(epoch))
def main(opt): with open(opt.infos_path, 'rb') as f: infos = pickle.load(f) #override and collect parameters if len(opt.input_h5) == 0: opt.input_h5 = infos['opt'].input_h5 if len(opt.input_json) == 0: opt.input_json = infos['opt'].input_json if opt.batch_size == 0: opt.batch_size = infos['opt'].batch_size if len(opt.id) == 0: opt.id = infos['opt'].id ignore = ['id', 'batch_size', 'beam_size', 'strat_from', 'language_eval'] for key, value in vars(infos['opt']).items(): if key not in ignore: if key in vars(opt): assert vars(opt)[key] == vars(infos['opt'])[key],\ key+" option not consistent" else: vars(opt).update({key: value}) vocab = infos['vocab'] device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') encoder = Encoder() decoder = Decoder(opt) encoder = encoder.to(device) decoder = decoder.to(device) decoder.load_state_dict(torch.load(opt.model, map_location=str(device))) encoder.eval() decoder.eval() criterion = utils.LanguageModelCriterion().to(device) if len(opt.image_folder) == 0: loader = get_loader(opt, 'test') loader.ix_to_word = vocab loss, split_predictions, lang_stats = \ eval_utils.eval_split(encoder, decoder, criterion, opt, vars(opt)) print('loss: ', loss) print(lang_stats) result_json_path = os.path.join(opt.checkpoint_path, "captions_"+opt.split+"2014_"+opt.id+"_results.json") with open(result_json_path, "w") as f: json.dump(split_predictions, f)
def __init__(self, args): super(Tester, self).__init__() self.args = args test_dataset = data_factory[args.dataset](self.args, 'test') self.test_loader = get_loader(test_dataset, args, 'test') self.num_classes = test_dataset.num_classes self.model = model_factory[args.model](self.args, self.num_classes) self.model.cuda() if self.args.loss == 'BCElogitloss': self.criterion = nn.BCEWithLogitsLoss() elif self.args.loss == 'tencentloss': self.criterion = TencentLoss(self.num_classes) self.voc12_mAP = VOC12mAP(self.num_classes) self.average_loss = AverageLoss(self.args.batch_size) self.average_topk_meter = TopkAverageMeter(self.num_classes, topk=self.args.topk) self.average_threshold_meter = ThresholdAverageMeter( self.num_classes, threshold=self.args.threshold)
def evaluate_NMD(NMD, param=0.5, mode='alpha'): NMD.eval() data_loader = get_loader(train=False, batch_size=1) data_iter = iter(data_loader) flag = True right = 0 count = 0 for _ in range(25): try: lr, hr = next(data_iter) except StopIteration: data_iter = iter(data_loader) lr, hr = next(data_iter) lr = lr.to(device) hr = hr.to(device) if NMD(hr)[:,0] > 0.5: right += 1 if mode == 'alpha': A = data_gen_alpha(lr, hr, param) if NMD(A)[:,0] < 0.5: right += 1 elif mode == 'sigma': B = data_gen_sigma(lr, hr, param) if NMD(B)[:,0] < 0.5: right += 1 else: print(f'wrong mode: {mode}. It should be alpha or sigma') assert False count += 2 return right / count
def main(): config = configparser.ConfigParser() config.read('config.ini') params = config['EVAL'] encoder_path = params['encoder_path'] decoder_path = params['decoder_path'] crop_size = int(params['crop_size']) vocab_path = params['vocab_path'] image_dir = params['image_dir'] caption_path = params['caption_path'] embed_size = int(params['embed_size']) hidden_size = int(params['hidden_size']) num_layers = int(params['num_layers']) batch_size = int(params['batch_size']) num_workers = int(params['num_workers']) # Image preprocessing transform = transforms.Compose([ transforms.Resize(229), transforms.RandomCrop(crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # Load vocabulary wrapper with open(vocab_path, 'rb') as f: vocab = pickle.load(f) # Build models encoder = EncoderCNN( embed_size).eval() # eval mode (batchnorm uses moving mean/variance) decoder = DecoderRNN(embed_size, hidden_size, len(vocab), num_layers).eval() encoder = encoder.to(device) decoder = decoder.to(device) # Load the trained model parameters encoder.load_state_dict(torch.load(encoder_path)) decoder.load_state_dict(torch.load(decoder_path)) data_loader = get_loader(image_dir, caption_path, vocab, transform, batch_size, True, num_workers) bleu_score = 0 def id_to_word(si): s = [] for word_id in si: word = vocab.idx2word[word_id] s.append(word) if word == '<end>': break #try : #s.remove('.') #s.remove('<start>') #s.remove('<end>') #except: #pass return (s) for i, (images, captions, lengths) in enumerate(data_loader): # Generate an caption from the image images = images.to(device) feature = encoder(images) sampled_ids = decoder.sample(feature) sampled_ids = sampled_ids[0].cpu().numpy( ) # (1, max_seq_length) -> (max_seq_length) captions = captions.detach().cpu().numpy() references = [] for cap in captions: references.append(id_to_word(cap)) gen_cap = id_to_word(sampled_ids) bleu_score = bleu_score + nltk.translate.bleu_score.sentence_bleu( references, gen_cap, smoothing_function=c.method7) if i % 500 == 0: print(i + 1, ' bleu_score ', bleu_score / (i + 1))
type=int, default=1, help='the channel of out img, decide the num of class') parser.add_argument('--gpu_avaiable', type=str, default='0', help='the gpu used') parser.add_argument('--checkpoints', type=str, default='./weights/model_best.pth', help="weight's path") args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_avaiable device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Loading test data # test_loader = get_loader( # args.data_path, args.crop_size, args.resize, args.batch_size, mode='test') test_loader = get_loader(args.data_path, None, args.resize, args.batch_size, mode='test') # Load model net = HybridNet(input_size=args.resize[0], n_classes=args.n_class) net, _ = load_pretrained(net, args.checkpoints) eval(net, test_loader, device)
def test(encoder, decoder, vocab, num_samples, num_hints, debug=False, c_step=0.0, no_avg=True): transform = transforms.Compose([ transforms.Resize(224), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) rt_image = './data/val_resized2014' annotations = args.caption or './data/annotations/captions_val2014.json' shuffle = False batch_size = 2 if args.adapt else 1 # If inputting random caption as gt, use batch of 2 and swap gts data_loader = get_loader(rt_image, annotations, vocab, transform, batch_size, shuffle, 1) assert len(vocab) == decoder.linear.out_features avg_gt_score, avg_gt_score_hint = torch.zeros(args.compare_steps, 1), torch.zeros( args.compare_steps, 1) gt_scores, gt_scores_hint = [], [] avg_crossEnloss, avg_crossEnloss_hint = torch.zeros( args.compare_steps, 1), torch.zeros(args.compare_steps, 1) crossEnlosses, crossEnlosses_hint = [], [] num_sampled = 0 data_points = [] coco_json = CocoJson('data/captions_val2014.json', 'data/captions_val2014_results.json') coco_json_update = CocoJson('data/captions_val2014.json', 'data/captions_val2014_results_u.json') for i, (images, captions, lengths, img_ids, ann_ids) in enumerate(data_loader): if i >= num_samples or args.adapt and i * 2 >= num_samples: break for k in range(batch_size): image, length, img_id, ann_id = images[k:k+1], lengths[k:k+1], \ img_ids[k:k+1], ann_ids[k:k+1] caption = captions[k:k + 1] if args.adapt: # use the other image's caption for gt input gt_input = captions[(k + 1) % batch_size, :args.num_hints + 1] else: gt_input = captions[k, :args.num_hints + 1] image_tensor = to_var(image, volatile=True) feature = encoder(image_tensor) # Compute probability score if args.msm == "ps": gt_score, gt_score_hint, num_compare = probabilityScore( caption, feature, vocab, num_hints, decoder, c_step, args.compare_steps) if not no_avg: avg_gt_score = avg_gt_score.index_add_( 0, torch.LongTensor(range(num_compare)), gt_score) avg_gt_score_hint = avg_gt_score_hint.index_add_( 0, torch.LongTensor(range(num_compare)), gt_score_hint) else: gt_scores.append(gt_score[:num_compare]) gt_scores_hint.append(gt_score_hint[:num_compare]) # Compute cross entropy loss elif args.msm == 'ce': crossEnloss, crossEnloss_hint, num_compare = crsEntropyLoss( caption, length, feature, vocab, num_hints, decoder, c_step, args.compare_steps) if type(crossEnloss) == type(None): continue if not no_avg: avg_crossEnloss = avg_crossEnloss.index_add_( 0, torch.LongTensor(range(num_compare)), crossEnloss) avg_crossEnloss_hint = avg_crossEnloss_hint.index_add_( 0, torch.LongTensor(range(num_compare)), crossEnloss_hint) else: crossEnlosses.append(crossEnloss) crossEnlosses_hint.append(crossEnloss_hint) # Evaluate with pycoco tools elif args.msm == "co": no_update, pred_caption, _ = decode_beta(feature, gt_input, decoder, \ vocab, c_step, args.prop_steps) caption = [vocab.idx2word[c] for c in caption[0, 1:-1]] gt_input = [vocab.idx2word[c] for c in gt_input[:-1]] no_update = ' '.join(gt_input) + ' ' + ' '.join( no_update.split()[num_hints:]) pred_caption = ' '.join(gt_input) + ' ' + ' '.join( pred_caption.split()[num_hints:]) caption = ' '.join(caption) if args.load_val: caption = None coco_json_update.add_entry(img_id[0], ann_id[0], caption, pred_caption) coco_json.add_entry(img_id[0], ann_id[0], caption, no_update) if debug and not args.test_c_step: print( "Ground Truth: {}\nNo hint: {}\nHint: {}\ \nGround Truth Score: {}\nGround Truth Score Improve {}\ ".format(caption, hypothesis, hypothesis_hint, gt_score, gt_score_hint)) if args.test_c_step: return data_points if args.msm == "ps": avg_gt_score /= i avg_gt_score_hint /= i if not no_avg: return (avg_gt_score, avg_gt_score_hint) else: return (gt_scores, gt_scores_hint) elif args.msm == "ce": avg_crossEnloss /= i avg_crossEnloss_hint /= i if not no_avg: return (avg_crossEnloss, avg_crossEnloss_hint) else: return (crossEnlosses, crossEnlosses_hint) elif args.msm == "co": coco_json.create_json() coco_json_update.create_json() return None
from models.hmnet_heavy_x1_ab_fea_0310 import hmnet from utils.data_loader import get_loader import trainer_hmnet_Flickr2K as trainer torch.manual_seed(0) scale_factor = 4 batch_size = 1 epoch_start = 0 num_epochs = 40 model = hmnet(scale=scale_factor) #model.load_state_dict(torch.load('./weights/HMNET_x4_Heavy_REDS_JPEG.pth')) train_loader = get_loader(data='Flickr2K', mode='train', batch_size=batch_size, height=0, width=0, scale_factor=1, augment=True, force_size=True) test_loader = get_loader(data='Flickr2K', mode='test', batch_size=batch_size, height=0, width=0, scale_factor=1, augment=True, force_size=True) trainer.train(model, train_loader, test_loader, mode=f'HMNET_Flickr2K_ablation_fea',
def test(encoder, decoder, vocab, num_samples, num_hints, debug=False, c_step=0.0, no_avg=True): transform = transforms.Compose([ transforms.Resize(224), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) rt_image = 'data/val_resized2014' annotations = args.caption or 'data/annotations/captions_val2014.json' shuffle = False data_loader = get_loader(rt_image, annotations, vocab, transform, 1, shuffle, 1) assert len(vocab) == decoder.linear.out_features avg_gt_score, avg_gt_score_hint = torch.zeros(args.compare_steps, 1), torch.zeros( args.compare_steps, 1) gt_scores, gt_scores_hint = [], [] avg_crossEnloss, avg_crossEnloss_hint = torch.zeros( args.compare_steps, 1), torch.zeros(args.compare_steps, 1) crossEnlosses, crossEnlosses_hint = [], [] num_sampled = 0 data_points = [] coco_json = CocoJson('data/captions_val2014.json', 'data/captions_val2014_results.json') coco_json_update = CocoJson('data/captions_val2014.json', 'data/captions_val2014_results_u.json') count = 0 for i, (image, caption, length, img_id, ann_id) in enumerate(data_loader): if num_sampled > num_samples or i > num_samples: break image_tensor = to_var(image, volatile=True) feature = encoder(image_tensor) # Compute optimal c_step by (pred, ce) if args.msm == "co": no_update, pred_caption, _ = decode_beta(feature, caption[0,:num_hints+1], decoder, \ vocab, c_step, args.prop_steps) # print caption # no_hint, _, _ = decode_beta(feature,caption[0,:1], decoder, \ # vocab, c_step, args.prop_steps) caption = [vocab.idx2word[c] for c in caption[0, 1:-1]] no_update = ' '.join(caption[:num_hints]) + ' ' + ' '.join( no_update.split()[num_hints:]) pred_caption = ' '.join(caption[:num_hints]) + ' ' + ' '.join( pred_caption.split()[num_hints:]) caption = ' '.join(caption) if args.load_val: caption = None coco_json_update.add_entry(img_id[0], ann_id[0], caption, pred_caption) coco_json.add_entry(img_id[0], ann_id[0], caption, no_update) if debug: print("Ground Truth: {}\nNo hint: {}\nHint: {}\ \nGround Truth Score: {}\nGround Truth Score Improve {}\ ".format(caption, hypothesis, hypothesis_hint, gt_score, gt_score_hint)) if args.msm == "co": coco_json.create_json() coco_json_update.create_json() return None
from models.hmnet_heavy_ablation_fea import hmnet from utils.data_loader import get_loader import trainer_0426 as trainer torch.manual_seed(0) scale_factor = 4 batch_size = 1 epoch_start = 0 num_epochs = 5 model = hmnet(scale=scale_factor) today = datetime.datetime.now().strftime('%Y.%m.%d') size = 256 num_epochs = 5 train_loader = get_loader(data='REDS', mode='train', batch_size=batch_size, height=size, width=size, scale_factor=4, augment=True) test_loader = get_loader(data='REDS', mode='test', height=256, width=256, scale_factor=4) trainer.train(model, train_loader, test_loader, mode=f'HMNET_REDS_ab_fea', epoch_start=0, num_epochs=num_epochs, save_model_every=1, test_model_every=1, today=today, refresh=False) from models.hmnet_heavy import hmnet from utils.data_loader import get_loader import trainer_0426 as trainer torch.manual_seed(0) scale_factor = 4 batch_size = 1 epoch_start = 0 num_epochs = 5 model = hmnet(scale=scale_factor)
def main(): logger = LogMaster.get_logger('eval') if not os.path.isfile(args.ckpt_path): print('checkpoint not found: ', args.ckpt_path) exit(-1) # Image preprocessing # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.Resize([256, 256]), transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # Load vocabulary wrapper. with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) logger.info('building data loader...') # Build data loader data_loader, image_ids = get_loader(args.val_image_dir, args.val_caption_path, vocab, transform, args.batch_size, shuffle=False, num_workers=args.num_workers, is_eval=True) logger.info('building model...') # Build the models vocab_size = len(vocab) if args.model == 'ssa': net = SSA(embed_dim=args.embed_size, lstm_dim=args.hidden_size, vocab_size=vocab_size) elif args.model == 'nic': net = NIC(embed_dim=args.embed_size, lstm_dim=args.hidden_size, vocab_size=vocab_size) elif args.model == 'scacnn': net = SCACNN(embed_dim=args.embed_size, lstm_dim=args.hidden_size, vocab_size=vocab_size) else: net = None print('model name not found: ' + args.model) exit(-2) net.eval() if torch.cuda.is_available(): if torch.cuda.device_count() > 1: net = nn.DataParallel(net) net.cuda() net.zero_grad() logger.info('restoring pretrained model...') checkpoint = torch.load(args.ckpt_path) try: args_dict = checkpoint['args'] args.batch_size = args_dict['batch_size'] args.learning_rate = args_dict['learning_rate'] args.att_mode = args_dict['att_mode'] args.model = args_dict['model'] args.embed_size = args_dict['embed_size'] args.hidden_size = args_dict['hidden_size'] args.num_layers = args_dict['num_layers'] net.load_state_dict(checkpoint['net_state']) epoch = checkpoint['epoch'] print('using loaded args from checkpoint:') pprint(args) except: net.load_state_dict(checkpoint) epoch = 0 logger.info('start generating captions...') total_step = len(data_loader) start_token = vocab('<start>') end_token = vocab('<end>') syn_captions = [] keys = {} for i, (images, inputs, targets, masks, lengths, img_ids) in tqdm(enumerate(data_loader), total=total_step, leave=False, ncols=80, unit='b'): images = to_var(images, requires_grad=False) if args.beam_width == 1: results = net.greedy_search(images, start_token).data.cpu().numpy() else: results = net.beam_search( images, start_token, beam_width=args.beam_width).data.cpu().numpy() results = list(results) # each element is [seq_len, 1] for i in range(len(results)): sentence = '' res = list(results[i]) img_id = img_ids[i] for w in res: if w == start_token: continue elif w == end_token: break word = vocab.idx2word[w] sentence += (' ' + word) # only keep one caption for each image try: _ = keys[img_id] except KeyError: keys[img_id] = 1 item = {'image_id': img_id, 'caption': sentence} syn_captions.append(item) res_dir = Path(args.result_dir) if not res_dir.is_dir(): res_dir.mkdir() result_path = res_dir / Path(args.model + '-' + str(epoch) + '-predictions.json') with open(str(result_path), 'w') as fout: json.dump(syn_captions, fout) logger.info(f'captions saved: {str(result_path)}')
def main(args): args.model_path = os.path.join(args.model_path, str(datetime.date.today())) if not os.path.exists(args.model_path): os.makedirs(args.model_path) loss_fn = nn.CrossEntropyLoss() softmax = nn.Softmax(dim=1) # Build DDxNet with 4 DDx blocks of convolutions model = DDxNet(args.num_channels, args.num_timesteps, DDx_block, [2, 6, 8, 4], args.output_dim, causal=True, use_dilation=True).to(device) # multi-gpu training if available if (torch.cuda.device_count() > 1): print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=(0.9, 0.98)) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1**0.75, verbose=True) train_loader = get_loader(args.data_dir, 'train', args.batch_size, args.shuffle) test_loader = get_loader(args.data_dir, 'test', args.batch_size, shuffle=False) best_val_acc = 0. if not os.path.exists(os.path.join('./logs', str(datetime.date.today()))): os.makedirs(os.path.join('./logs', str(datetime.date.today()))) results_file = os.path.join('./logs', str(datetime.date.today()), args.results_file) results = ResultsLog(results_file) for epoch in range(args.num_epochs): avg_loss = 0. total_predlabs = [] total_truelabs = [] total_probs = [] for itr, (X, y_true) in enumerate(train_loader): model.train() X = X.to(device).float() y_true = y_true.to(device).long() y_pred = model(X) loss = loss_fn(y_pred, torch.max(y_true, 1)[1]) avg_loss += loss.item() / len(train_loader) optimizer.zero_grad() loss.backward() if args.clip_grad > 0: torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad) optimizer.step() probs = softmax(y_pred) _, predlabs = torch.max(probs.data, 1) total_probs.extend(probs.data.cpu().numpy()) total_predlabs.extend(predlabs.data.cpu().numpy()) total_truelabs.extend(torch.max(y_true, 1)[1].data.cpu().numpy()) batch_acc = accuracy_score( torch.max(y_true, 1)[1].data.cpu().numpy(), predlabs.data.cpu().numpy()) if (itr + 1) % 50 == 0: print(('Epoch: {} Iter: {}/{} Loss: {} Acc: {}').format( epoch, itr + 1, len(train_loader), loss.item(), batch_acc)) if ((itr + 1) % len(train_loader)) == 0: total_truelabs = np.array(total_truelabs) total_predlabs = np.array(total_predlabs) total_probs = np.array(total_probs) total_train_acc = accuracy_score(total_truelabs, total_predlabs) f1 = f1_score(total_truelabs, total_predlabs, average='macro') res = { 'epoch': epoch + (itr * 1.0 + 1.0) / len(train_loader), 'steps': epoch * len(train_loader) + itr + 1, 'train_loss': avg_loss, 'train_f1': f1, 'train_acc': total_train_acc } model.eval() with torch.no_grad(): total_predlabs = [] total_probs = [] total_truelabs = [] total_val_loss = 0. for i, (dat, labs) in enumerate(test_loader): dat = dat.to(device).float() labs = labs.to(device).long() y_pred = model(dat) val_loss = loss_fn(y_pred, torch.max(labs, 1)[1]) probs = softmax(y_pred) _, predlabs = torch.max(probs.data, 1) total_probs.extend(probs.data.cpu().numpy()) total_predlabs.extend(predlabs.data.cpu().numpy()) total_truelabs.extend( torch.max(labs, 1)[1].data.cpu().numpy()) total_val_loss += (val_loss.item() / len(test_loader)) total_truelabs = np.array(total_truelabs) total_predlabs = np.array(total_predlabs) total_probs = np.array(total_probs) total_val_acc = accuracy_score(total_truelabs, total_predlabs) total_val_f1 = f1_score(total_truelabs, total_predlabs, average='macro') print("At Epoch: {}, Iter: {}, val_loss: {}, val_acc: {}". format(epoch, itr + 1, total_val_loss, total_val_acc)) print("Confusion Matrix: ") print(confusion_matrix(total_truelabs, total_predlabs)) if (total_val_acc > best_val_acc): best_val_acc = total_val_acc print("saving model") torch.save( model.state_dict(), os.path.join(args.model_path, args.results_file + '_model.pth')) np.savetxt(os.path.join( args.model_path, args.results_file + '_prob.txt'), total_probs, delimiter=',') np.savetxt(os.path.join( args.model_path, args.results_file + '_pred.txt'), total_predlabs, delimiter=',') np.savetxt(os.path.join( args.model_path, args.results_file + '_true.txt'), total_truelabs, delimiter=',') res['val_loss'] = total_val_loss res['val_acc'] = total_val_acc res['val_f1'] = total_val_f1 plot_loss = ['train_loss'] plot_acc = ['train_acc'] plot_f1 = ['train_f1'] plot_loss += ['val_loss'] plot_acc += ['val_acc'] plot_f1 += ['val_f1'] results.add(**res) results.plot(x='epoch', y=plot_loss, title='Multi-Class Loss', ylabel='CE Loss') results.plot(x='epoch', y=plot_acc, title='Accuracy', ylabel='Accuracy') results.plot(x='epoch', y=plot_f1, title='F1-Score (Macro)', ylabel='F1-Score') results.save() scheduler.step(total_val_loss, epoch)
def main(): logger = LogMaster.get_logger('main') # Create checkpoint directory if not os.path.exists(args.ckpt_dir): os.makedirs(args.ckpt_dir) # Image preprocessing # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.Resize([256, 256]), transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # Load vocabulary wrapper. with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) if args.restore_train: if not os.path.isfile(args.ckpt_path): print('checkpoint not found: ', args.ckpt_path) exit(-1) checkpoint = torch.load(args.ckpt_path) args_dict = checkpoint['args'] args.batch_size = args_dict['batch_size'] args.learning_rate = args_dict['learning_rate'] args.att_mode = args_dict['att_mode'] args.model = args_dict['model'] args.embed_size = args_dict['embed_size'] args.hidden_size = args_dict['hidden_size'] args.num_layers = args_dict['num_layers'] cur_epoch = checkpoint['epoch'] print('restore training from existing checkpoint') pprint.pprint(args_dict) else: cur_epoch = 0 checkpoint = None logger.info('building data loader...') # Build data loader data_loader = get_loader(args.train_image_dir, args.train_caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) logger.info(f'building model {args.model}...') # Build the models vocab_size = len(vocab) if args.model == 'ssa': net = SSA(embed_dim=args.embed_size, lstm_dim=args.hidden_size, vocab_size=vocab_size, dropout=args.dropout, fine_tune=args.fine_tune) elif args.model == 'nic': net = NIC(embed_dim=args.embed_size, lstm_dim=args.hidden_size, vocab_size=vocab_size, dropout=args.dropout, fine_tune=args.fine_tune) elif args.model == 'scacnn': net = SCACNN(embed_dim=args.embed_size, lstm_dim=args.hidden_size, vocab_size=vocab_size, dropout=args.dropout, att_mode=args.att_mode, fine_tune=args.fine_tune) else: net = None print('model name not found: ' + args.model) exit(-2) net.train() net.zero_grad() params = net.train_params if torch.cuda.is_available(): if torch.cuda.device_count() > 1 and args.model == 'scacnn': net = nn.DataParallel(net) net.cuda() # Loss and Optimizer criterion = nn.CrossEntropyLoss(reduce=False) optimizer = torch.optim.Adam(params, lr=args.learning_rate) if args.restore_train: print('restoring from checkpoint...') net.load_state_dict(checkpoint['net_state']) optimizer.load_state_dict(checkpoint['opt_state']) logger.info('start training...') # Train the Models total_step = len(data_loader) running_loss = 0 for epoch in range(args.num_epochs): for i, (images, inputs, targets, masks, lengths, img_ids) in tqdm(enumerate(data_loader), total=total_step, leave=False, ncols=80, unit='b'): # Set mini-batch data if args.fine_tune: images = to_var(images, requires_grad=True) else: images = to_var(images, requires_grad=False) inputs = to_var(inputs, requires_grad=False) targets = to_var(targets, requires_grad=False) targets = targets.view(-1) masks = to_var(masks, requires_grad=False).view(-1) net.zero_grad() # Forward, Backward and Optimize outputs = net.forward(images, inputs, lengths) outputs = outputs.contiguous().view(-1, vocab_size) loss = criterion(outputs, targets) loss = torch.mean(loss * masks) loss.backward() optimizer.step() running_loss += loss.data.item() # Make sure python releases GPU memory del loss, outputs, images, inputs, targets, masks, lengths, img_ids running_loss /= total_step logger.info('Epoch [%d/%d], Loss: %.4f, Perplexity: %5.4f' % (cur_epoch + epoch + 1, args.num_epochs, running_loss, np.exp(running_loss))) running_loss = 0 # Save the model if (epoch + 1) % args.save_step == 0: if args.model == 'scacnn': save_file = args.model + '-' + args.att_mode + '-model-' + str( cur_epoch + epoch + 1) + '.ckpt' else: save_file = args.model + '-model-' + str(cur_epoch + epoch + 1) + '.ckpt' save_path = os.path.join(args.ckpt_dir, save_file) args_dict = vars(args) opt_state = optimizer.state_dict() net_state = net.state_dict() epoch_id = epoch + 1 save_data = { 'net_state': net_state, 'opt_state': opt_state, 'args': args_dict, 'epoch': epoch_id } torch.save(save_data, save_path) logger.info(f'model saved: {save_path}')
ssims.append(get_ssim(hr[b].unsqueeze(0), sr[b].unsqueeze(0)).item()) if h > 160 and w > 160: msssim = get_msssim(hr[b].unsqueeze(0), sr[b].unsqueeze(0)).item() else: msssim = 0 msssims.append(msssim) return np.array(psnrs).mean(), np.array(ssims).mean(), np.array( msssims).mean() quantize = lambda x: x.mul(255).clamp(0, 255).round().div(255) torch.manual_seed(0) train_loader = get_loader(mode='train', batch_size=16, scale_factor=4, augment=True) test_loader = get_loader(mode='test') device = 'cuda' if torch.cuda.is_available() else 'cpu' up = torch.nn.UpsamplingBilinear2d(scale_factor=4).to(device) def get_sr(lr, hr, alpha=0.1): return quantize(up(lr) * (1 - alpha) + hr * alpha) model = VGG(pretrained=True).to(device) weight_dir = f'./weights/Discriminator' os.makedirs(weight_dir, exist_ok=True)
test_loader = get_loader(mode='test', height=256, width=256, scale_factor=4) trainer.train(model, train_loader, test_loader, mode=f'ref_denoise_FFDNet', epoch_start=epoch_start, num_epochs=num_epochs, save_model_every=20, test_model_every=1, today=today) """ today = '2021.03.05' #model = DnCNN() #train_loader = get_loader(data='SIDD', mode='train', batch_size=batch_size, height=192, width=192, scale_factor=1, augment=True) #test_loader = get_loader(data='SIDD', mode='test', height=256, width=256, scale_factor=1) #trainer.train(model, train_loader, test_loader, mode=f'ref_denoise_sidd_DNCNN', epoch_start=epoch_start, num_epochs=num_epochs, save_model_every=20, test_model_every=1, today=today) #model = MemNet(in_channels=3, channels=64, num_memblock=6, num_resblock=6) #train_loader = get_loader(data='SIDD', mode='train', batch_size=batch_size, height=192, width=192, scale_factor=1, augment=True) #test_loader = get_loader(data='SIDD', mode='test', height=256, width=256, scale_factor=1) #trainer.train(model, train_loader, test_loader, mode=f'ref_denoise_sidd_MemNet', epoch_start=epoch_start, num_epochs=num_epochs, save_model_every=20, test_model_every=1, today=today) #model = DHDN() #train_loader = get_loader(data='SIDD', mode='train', batch_size=batch_size, height=192, width=192, scale_factor=1, augment=True) #test_loader = get_loader(data='SIDD', mode='test', height=256, width=256, scale_factor=1) #trainer.train(model, train_loader, test_loader, mode=f'ref_denoise_sidd_DHDN', epoch_start=epoch_start, num_epochs=num_epochs, save_model_every=20, test_model_every=1, today=today) model = FFDNet() train_loader = get_loader(data='SIDD', mode='train', batch_size=batch_size, height=192, width=192, scale_factor=1, augment=True) test_loader = get_loader(data='SIDD', mode='test', height=256, width=256, scale_factor=1) trainer.train(model, train_loader, test_loader, mode=f'ref_denoise_sidd_FFDNet', epoch_start=epoch_start, num_epochs=num_epochs, save_model_every=20, test_model_every=1, today=today)
model = NMDiscriminator().to(device) # torchsummary(model, input_size=(3, 448, 448)) model.load_state_dict(torch.load('./models/weights/NMD.pth')) # Hyper-parameters num_epochs = 1000 learning_rate = 1e-4 eps = 1e-7 criterion = nn.BCELoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) loader = get_loader(batch_size=24) total_iter = len(loader) current_lr = learning_rate alpha = 0.5 sigma = 0.1 stime = time.time() total_epoch_iter = total_iter * num_epochs iter_count = 0 for epoch in range(num_epochs): for i, (lr, hr) in enumerate(loader): iter_count += 1 lr = lr.to(device)
import trainer_DRN as trainer from models.DRN import DRN from utils.data_loader import get_loader import torch torch.manual_seed(0) model = DRN() # train_loader = get_loader(mode='train', height=192, width=192, scale_factor=4, batch_size=4) train_loader = get_loader(mode='train', height=196, width=196, scale_factor=4, batch_size=4, augment=True) test_loader = get_loader(mode='test', scale_factor=4) trainer.train(model, train_loader, test_loader, mode='DRN_Baseline')
def main(args): # create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # image preprocessing transform = transforms.Compose([ transforms.Resize(args.crop_size), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # load vocab wrapper with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) print("cluster sizes: ", vocab.get_shapes()) with open(args.annotation_path, 'rb') as f: annotation = pickle.load(f) print("annotations size:", len(annotation)) # build data loader data_loader = get_loader(annotation, args.image_dir, args.h_dir, args.openpose_dir, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers, seq_length=args.seq_length) upp_size, low_size = vocab.get_shapes() encoder = EncoderCNN(args.embed_size).to(device) if args.upp: decoder = DecoderRNN(args.embed_size, args.hidden_size, upp_size + 1, args.num_layers).to(device) elif args.low: decoder = DecoderRNN(args.embed_size, args.hidden_size, low_size + 1, args.num_layers).to(device) else: print('Please specify upper/lower body model to train') exit(0) # loss and optimizer criterion = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.bn.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate) # train the models total_step = len(data_loader) print("total iter", total_step) for epoch in range(args.num_epochs): for i, (images, poses, homography, poses2, lengths) in enumerate(data_loader): images = images.to(device) poses = poses.to(device) targets = pack_padded_sequence(poses, lengths, batch_first=True)[0] # forward, backward, optimize features = encoder(images) outputs = decoder(features, homography, poses2, lengths) loss = criterion(outputs, targets) decoder.zero_grad() encoder.zero_grad() loss.backward() optimizer.step() if i % args.log_step == 0: print( 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}' .format(epoch, args.num_epochs, i, total_step, loss.item(), np.exp(loss.item()))) if ((i + 1) % args.save_step == 0) or (i == total_step - 1): torch.save( decoder.state_dict(), os.path.join(args.model_path, 'decoder-{}-{}.ckpt'.format(epoch + 1, i + 1))) torch.save( encoder.state_dict(), os.path.join(args.model_path, 'encoder-{}-{}.ckpt'.format(epoch + 1, i + 1)))
def main(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Image preprocessing, normalization for the pretrained resnet transform = None # Load vocabulary wrapper with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) vocab_size = vocab.__len__() # Build data loader data_loader = get_loader(args.training_feat_dir, args.training_captions, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) val_data_loader = get_loader(args.validation_feat_dir, args.validation_captions, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # Build the models encoder = EncoderRNN(args.video_size, args.embed_size, args.input_dropout_p, args.rnn_dropout_p, args.num_layers, args.bidirectional).to(device) decoder = DecoderRNN( vocab_size, args.max_seq_length, args.embed_size, args.word_size, ).to(device) model = S2VTAttentionModel(encoder, decoder) # Loss and optimizer criterion = CustomLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=0) exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.8) # Train the models total_step = len(data_loader) best_loss = 999 for epoch in range(args.num_epochs): model.train() exp_lr_scheduler.step() for i, (features, captions, lengths) in enumerate(data_loader): # Set teacher forcing and schedule sampling teacher_forcing_ratio = 0.7 use_teacher_forcing = True if random.random( ) < teacher_forcing_ratio else False # Set mini-batch dataset features = features.to(device) captions = captions.to(device) # Forward, backward and optimize if use_teacher_forcing: # Teacher forcing: Feed the target as the next input seq_probs, _ = model(features, captions, mode='teacher_forcing') else: # Without teacher forcing: use its own predictions as the next input seq_probs, _ = model(features, mode='no_teacher') loss = criterion(seq_probs, captions[:, 1:], lengths) # elimanate <SOS> optimizer.zero_grad() loss.backward() # clip_gradient(optimizer, grad_clip=0.1) optimizer.step() # Print log info if i % args.log_step == 0: print( 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}' .format(epoch + 1, args.num_epochs, i, total_step, loss.item(), np.exp(loss.item()))) # Evaluation model.eval() val_loss = [] for i, (features, captions, lengths) in enumerate(val_data_loader): # Set mini-batch dataset features = features.to(device) captions = captions.to(device) # Forward, backward and optimize with torch.no_grad(): seq_probs, _ = model(features, mode='no_teacher') loss = criterion(seq_probs, captions[:, 1:], lengths) # elimanate <SOS> val_loss.append(loss.item()) # Print validation info val_loss = np.mean(val_loss) print('Epoch [{}/{}], VAL_Loss: {:.4f}, Perplexity: {:5.4f}'.format( epoch + 1, args.num_epochs, val_loss, np.exp(val_loss))) # Save the model checkpoints if (epoch + 1) % args.save_step == 0: torch.save( model.state_dict(), os.path.join(args.model_path, 'checkpoint-{}.ckpt'.format(epoch + 1))) if best_loss > val_loss: torch.save( model.state_dict(), os.path.join(args.model_path, 'best.pth'.format(epoch + 1))) best_loss = val_loss
return loss1 + loss2 if __name__ == "__main__": netname = 'iternet' num_epochs = 1000 eps = 1e-6 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = IterNet().to(device) # model.load_state_dict(torch.load('./models/weights/iternet_model_final.pth')) # from torchsummary import summary # summary(model, input_size=(3, 512, 512)) loader = get_loader(image_dir='./data/', batch_size=2, mode='train') total_iter = len(loader) lr = 1e-3 optimizer = torch.optim.Adam(model.parameters(), lr=lr) stime = time.time() total_epoch_iter = total_iter * num_epochs iter_count = 0 bce = nn.BCELoss() losses = [] summary = SummaryWriter() for epoch in range(num_epochs): for i, (ximg, yimg) in enumerate(loader):
default='0', help='the gpu used') parser.add_argument('--checkpoints', type=str, default='./weights/DRIVE1/model_best.pth', help="weight's path") parser.add_argument('--save_path', type=str, required=True, choices=['./results/prob', './results/binary'], help="weight's path") args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_avaiable device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Loading test data test_loader = get_loader(args.data_path, args.resize, args.batch_size, mode='test', dataset_name=args.dataset) # Load model net = MDMNet(input_size=args.resize, n_classes=args.n_class) # Load weights net, _ = load_pretrained(net, args.checkpoints) run_test(net, test_loader, device, args.save_path)
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__)))) from utils.data_loader import get_loader import torch torch.manual_seed(0) from models.RCAN import RCAN model = RCAN() scale_factor = 4 if scale_factor == 4: train_loader = get_loader(mode='train', batch_size=16, height=192, width=192, scale_factor=4, augment=True) test_loader = get_loader(mode='test', height=256, width=256, scale_factor=4) elif scale_factor == 2: train_loader = get_loader(mode='train', batch_size=16, augment=True) test_loader = get_loader(mode='test') # import trainer as trainer # trainer.train(model, train_loader, test_loader, mode='RCAN_x2_Baseline') # import trainer_v6_from_shallow as trainer # from models.RCAN_train_from_shallow import RCAN
import trainer from models.RCAN import RCAN from utils.data_loader import get_loader import torch torch.manual_seed(0) model = RCAN() train_loader = get_loader(mode='train', batch_size=16, augment=True) test_loader = get_loader(mode='test') trainer.train(model, train_loader, test_loader, mode='RCAN_Baseline')
def main(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Load vocabulary wrapper. with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Image preprocessing # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) val_loader = get_loader('./data/val_resized2014/', './data/annotations/captions_val2014.json', vocab, transform, 1, False, 1) start_epoch = 0 encoder_state = args.encoder decoder_state = args.decoder # Build the models encoder = EncoderCNN(args.embed_size) if not args.train_encoder: encoder.eval() decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers) if args.restart: encoder_state, decoder_state = 'new', 'new' if encoder_state == '': encoder_state = 'new' if decoder_state == '': decoder_state = 'new' if decoder_state != 'new': start_epoch = int(decoder_state.split('-')[1]) print("Using encoder: {}".format(encoder_state)) print("Using decoder: {}".format(decoder_state)) # Build data loader data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) """ Make logfile and log output """ with open(args.model_path + args.logfile, 'a+') as f: f.write("Training on vanilla loss (using new model). Started {} .\n". format(str(datetime.now()))) f.write("Using encoder: new\nUsing decoder: new\n\n") if torch.cuda.is_available(): encoder.cuda() decoder.cuda() # Loss and Optimizer criterion = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.bn.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate) batch_loss = [] batch_acc = [] # Train the Models total_step = len(data_loader) for epoch in range(start_epoch, args.num_epochs): for i, (images, captions, lengths, _, _) in enumerate(data_loader): # Set mini-batch dataset images = to_var(images, volatile=True) captions = to_var(captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] # Forward, Backward and Optimize decoder.zero_grad() encoder.zero_grad() features = encoder(images) out = decoder(features, captions, lengths) loss = criterion(out, targets) batch_loss.append(loss.data[0]) loss.backward() optimizer.step() # # Print log info # if i % args.log_step == 0: # print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f, Val: %.5f, %.5f' # %(epoch, args.num_epochs, i, total_step, # loss.data[0], np.exp(loss.data[0]), acc, gt_acc)) # with open(args.model_path + args.logfile, 'a') as f: # f.write('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f, Val: %.5f, %.5f\n' # %(epoch, args.num_epochs, i, total_step, # loss.data[0], np.exp(loss.data[0]), acc, gt_acc)) # Save the models if (i + 1) % args.save_step == 0: torch.save( decoder.state_dict(), os.path.join(args.model_path, 'decoder-%d-%d.pkl' % (epoch + 1, i + 1))) torch.save( encoder.state_dict(), os.path.join(args.model_path, 'encoder-%d-%d.pkl' % (epoch + 1, i + 1))) with open(args.model_path + 'training_loss.pkl', 'w+') as f: pickle.dump(batch_loss, f) with open(args.model_path + 'training_val.pkl', 'w+') as f: pickle.dump(batch_acc, f) with open(args.model_path + args.logfile, 'a') as f: f.write("Training finished at {} .\n\n".format(str(datetime.now())))
args = parser.parse_args() os.environ['CUDA_DEVICE_ORDER'] = "PCI_BUS_ID" os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_avaiable # cudnn related setting cudnn.benchmark = True cudnn.deterministic = False cudnn.enabled = True rm_mkdir(args.checkpoints) # Data iteration generation train_loader = get_loader(args.data_path, args.resize, args.batch_size, shuffle=True, dataset_name=args.dataset, num_workers=4) val_loader = get_loader(args.data_path, args.resize, args.batch_size, shuffle=False, dataset_name=args.dataset, mode='test', num_workers=4) net = MDMNet(input_size=args.resize, n_classes=args.n_class) if args.resume: net, _ = load_pretrained(net, args.pretrained)
'%Y%m%d%H') + '.log' train_logger = setup_logger('train_logger', log_file) model = ResNetPD().cuda() print('Params: ', get_n_params(model)) train_logger.info(f'Params: {get_n_params(model)}') train_logger.info( f'optimizer: {optimizer_type}, lr: {lr}, batch_size: {batch_size}, image_size: {train_size}' ) params = model.parameters() if optimizer_type == 'Adam': optimizer = torch.optim.Adam(params, lr) else: optimizer = torch.optim.SGD(params, lr, weight_decay=1e-4, momentum=0.9) image_root = '{}/images/'.format(train_path) gt_root = '{}/masks/'.format(train_path) train_loader = get_loader(image_root, gt_root, batchsize=batch_size, trainsize=train_size, augmentation=augumentation) total_step = len(train_loader) print('#' * 20, 'Start Training', '#' * 20) train(train_loader, model, optimizer, epochs, batch_size, train_size, clip, test_kvasir_path)
def eval_split(encoder, decoder, crit, opt, eval_kwargs={}): verbose = eval_kwargs.get('verbose', True) num_images = eval_kwargs.get('num_images', eval_kwargs.get('val_images_use', -1)) split = eval_kwargs.get('split', 'val') lang_eval = eval_kwargs.get('language_eval', 0) dataset = eval_kwargs.get('dataset', 'coco') beam_size = eval_kwargs.get('beam_size', 1) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") loader = get_loader(opt, split) decoder.eval() with torch.no_grad(): loss = 0 loss_sum = 0 loss_evals = 1e-8 predictions = [] total_step = len(loader) start = time.time() for i, data in enumerate(loader, 0): transform = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) imgs = [] for k in range(data['imgs'].shape[0]): img = torch.tensor(data['imgs'][k], dtype=torch.float) img = transform(img) imgs.append(img) imgs = torch.stack(imgs, dim=0).to(device) labels = torch.tensor(data['labels'].astype(np.int32), dtype=torch.long).to(device) masks = torch.tensor(data['masks'], dtype=torch.float).to(device) features = encoder(imgs) seqs = decoder(features, labels) loss = crit(seqs, labels[:, 1:], masks[:, 1:]) loss_sum += loss loss_evals += 1 seq, _ = decoder.sample(features) sents = utils.decode_sequence( loader.ix_to_word, seq[torch.arange(loader.batch_size, dtype=torch.long) * loader.seq_per_img]) print("batch [{} / {}] cost: {}".format(i, total_step, utils.get_duration(start))) for k, sent in enumerate(sents): entry = {"image_id": data['infos'][k]['id'], "caption": sent} predictions.append(entry) if verbose: print("image: %s: %s" % (entry['image_id'], entry['caption'])) if num_images >= 0 and (i + 1) * loader.batch_size >= num_images: break lang_stats = None if lang_eval == 1: lang_stats = language_eval(dataset, predictions, eval_kwargs['id'], split) decoder.train() return loss_sum / loss_evals, predictions, lang_stats