def main(config): cudnn.benchmark = True # to improve the efficiency # Create directories if not exist if not os.path.exists(config.model_path): os.makedirs(config.model_path) if not os.path.exists(config.result_path): os.makedirs(config.result_path) config.result_path = os.path.join(config.result_path, config.model_type) if not os.path.exists(config.result_path): os.makedirs(config.result_path) # lr = random.random()*0.0005 + 0.0000005 # augmentation_prob = random.random()*0.7 # epoch = random.choice([100, 150, 200, 250]) # decay_ratio = random.random()*0.8 # decay_epoch = int(epoch*decay_ratio) # # config.augmentation_prob = augmentation_prob # config.num_epochs = epoch # config.lr = lr # config.num_epochs_decay = decay_epoch print(config) # Notice the difference between these loaders train_loader = get_loader(config = config, image_path=config.train_path, crop_size=config.crop_size, batch_size=config.batch_size, sampler = sampler.SubsetRandomSampler(range(0,100000)), num_workers=config.num_workers, mode='train', augmentation_prob=config.augmentation_prob) valid_loader = get_loader(config = config, image_path=config.valid_path, crop_size=config.crop_size, batch_size=config.batch_size, sampler = sampler.SubsetRandomSampler(range(100000,103943)), num_workers=config.num_workers, mode='valid', augmentation_prob=0.) solver = Solver(config, train_loader, valid_loader) # Train and sample the images if config.mode == 'train': solver.train() elif config.mode == 'val': solver.val() else: solver.detect()
if not os.path.exists(config.model_path): os.makedirs(config.model_path) # 建立模型 print("construct model...") # net = model().to(device) net = model().to(device) # net = model().to(device) utilize.load_pre_model(net, "models/best.ckpt") # checkpoints = torch.load("models/cx_98.pth") # net.load_state_dict(checkpoints) # 加载测试集数据 test_loader = get_loader(train="test") test_step = len(test_loader) # 测试 print("Start test ...") net.eval() result, names = [], [] save_to_json = [] for i, (vf, label, name) in enumerate(test_loader): # af = af.to(device) vf = vf.to(device) # out = net(vf) out, _ = net(vf) # print(out.data.cpu().numpy())
def main(args): torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) if not os.path.exists(args.model_dir): os.makedirs(args.model_dir) transform = transforms.Compose([ transforms.Resize(args.crop_size), transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) img_list = prepare_entry(args.train_dir, args.train_cap) sentences = [c for img in img_list for c in img['cap']] vocab = build_dictionary(sentences, threshold=args.threshold, dict_path=args.dict_path, override=False) train_set = ImageCaptionSet(img_list, vocab, transform, shuffle=True) train_loader = get_loader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=2, drop_last=True) num_words = vocab.ukn_id + 1 print('num_words:', num_words) model = CapGenerator(args.emb_dim, num_words, args.hidden_dim) if args.pretrained: model.load_state_dict(torch.load(args.pretrained)) start_epoch = int(args.pretrained.split('/')[-1].split('_')[1]) + 1 else: start_epoch = 1 cnn_blocks = list( model.encoder.resnet_conv.children())[args.fine_tune_start_layer:] cnn_params = [list(sub_module.parameters()) for sub_module in cnn_blocks] cnn_params = [item for sublist in cnn_params for item in sublist] cnn_optimizer = torch.optim.Adam(cnn_params, lr=args.lr_cnn, betas=(args.alpha, args.beta)) other_params = (list(model.encoder.ai2v.parameters()) + list(model.encoder.ag2v.parameters()) + list(model.decoder.parameters())) lr = args.lr criterion = nn.CrossEntropyLoss().cuda() model.cuda() iter_size = len(train_loader) #val_iter = len(val_loader) cider_scores = [] best_cider = 0.0 best_epoch = 0 print('ITER size: {}', iter_size) for epoch in range(start_epoch, args.num_epochs + 1): if train_set.shuffle: np.random.shuffle(train_set.entries) print('shuffle train dataset') if epoch > args.lr_decay_start: frac = float(epoch - args.lr_decay_start) / args.lr_decay_ratio decay_fac = np.power(0.5, frac) lr = lr * decay_fac print('learning rate for Epoch {}: {:.3e}'.format(epoch, lr)) optimizer = torch.optim.Adam(other_params, lr=lr, betas=(args.alpha, args.beta)) model.train() for i, data in enumerate(train_loader): inputs, _, caps, last_pos = data inputs, caps = Variable(inputs).cuda(), Variable(caps).cuda() lstm_steps = max(last_pos) #targets = pack_padded_sequence(caps, last_pos, batch_first=True) model.zero_grad() packed_scores = model(inputs, caps, last_pos) targets = pack_padded_sequence(caps[:, 1:], last_pos, batch_first=True) #print(caps.shape, caps[:, 1:].shape, last_pos) loss = criterion(packed_scores[0], targets[0]) loss.backward() #???? for p in model.decoder.LSTM.parameters(): p.data.clamp_(-args.clip, args.clip) optimizer.step() cnn_lr = args.lr_cnn if epoch > args.cnn_epoch: #cnn_lr = cnn_lr * decay_fac cnn_optimizer = torch.optim.Adam(cnn_params, lr=cnn_lr, betas=(args.alpha, args.beta)) cnn_optimizer.step() scores = pad_packed_sequence(packed_scores, batch_first=True)[0] last = scores[-1] last_ind = list(last.max(1)[1].data) last_truth = list(caps[-1, 1:].data) print( 'TRAIN ITER: {} / {}, lstm_steps:{}, loss: {:.4f},Perplexity:{}\r' .format(i, iter_size, lstm_steps, loss.data[0], np.exp(loss.data[0])), end="") print("\n", end="") if epoch % args.save_freq == args.save_freq - 1: name = os.path.join(args.model_dir, 'epoch_{}'.format(epoch)) torch.save(model.state_dict(), name) scores = pad_packed_sequence(packed_scores, batch_first=True)[0] last = scores[-1] last_ind = list(last.max(1)[1].data) last_truth = list(caps[-1, 1:].data) print(last_truth, last_pos[-1]) print('pred: ', end="") for ix in last_ind: print(vocab.ix2word(ix), end="") if ix == 0: print("") break print(' ', end="") if ix != 0: print("\b.") print('truth: ', end="") for ix in last_truth: print(vocab.ix2word(ix), end="") if ix == 0: print("") break print(' ', end="") if ix != 0: print("\b.") #cider scores cider = coco_eval(model, args, epoch) cider_scores.append(cider) if cider > best_cider: best_cider = cider best_epoch = epoch if len(cider_scores) > 5: last_6 = np.array(cider_scores[-6:]) if max(last_6) < best_cider: print( 'No improvement with CIDEr in the last 6 epochs...Early stopping triggered.' ) print('Model of best epoch #: %d with CIDEr score %.2f' % (best_epoch, best_cider)) break torch.save(model.state_dict(), os.path.join(args.model_dir, 'trained_model'))
###### 不用管,用不到 ######## parser.add_argument('--patch_n', type=int, default=8) parser.add_argument('--patch_size', type=tuple, default=(16, 128, 128)) parser.add_argument('--drop_background', type=float, default=0.1) parser.add_argument('--epochs', type=int, default=50) parser.add_argument('--lr', type=float, default=1e-4) ###### 不用管,用不到 ######## args = parser.parse_args() return args if __name__ == '__main__': args = TestArgs() test_data_loader = get_loader(args, 'test') model = Solver(args) print('iter_:' + str(args.test_iters)) with torch.no_grad(): MAE = [] model.load_model(args.test_iters) for idx, data in enumerate(test_data_loader): model.set_input(data) MAE_, image_tuple = model.evalute() MAE.append(MAE_) visualizer.plot_images(image_tuple, args.test_iters, idx, args.save_path) print('epoch{}-MAE:{:.2f}+-{:.2f}'.format(args.test_iters, np.mean(MAE), np.std(MAE)))
print("SGD") else: optimizer = torch.optim.AdamW(net.parameters(), lr=config.lr, weight_decay=0.05) #根据式子进行计算 # scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=10,eta_min=5e-6,) # 设置记录文件 log_txt = config.log_path + "epoch_loss_lr_" + str( config.lr) + "_batchsize_" + str(config.batchsize) + ".txt" utilize.log_txt(filename=log_txt, mark=True) # 加载数据 train_loader = get_loader(train="train") total_step = len(train_loader) eval_loader = get_loader(train="val") eval_step = len(eval_loader) log_info = [] print("train start...") best_score = 0 for epoch in range(config.num_epochs): loss_epoch = 0 net.train() t1 = time.time() # for i,(af,vf, label,name) in enumerate(train_loader): for i, (vf, label, name) in enumerate(train_loader):
parser.add_argument('--lr', type=float, default=1e-4) parser.add_argument('--save_iters', type=int, default=20) parser.add_argument('--patch_n', type=int, default=24) parser.add_argument('--patch_size', type=tuple, default=(128, 128)) parser.add_argument('--drop_background', type=float, default=0.1) args = parser.parse_args() return args if __name__ == '__main__': args = TrainArgs() # get_loader(data_path, data_type, batch_size, patch_n, patch_size, drop_background): train_data_loader = get_loader(args, 'train') model = Solver(args) start_time = time.time() iter_sum = len(train_data_loader) for epoch in range(args.epochs): # 训练 for iter_, data in enumerate(train_data_loader): model.set_input(data) loss = model.train(epoch_end=(iter_ + 1 == iter_sum)) visualizer.print_current_state(epoch, args.epochs, iter_, iter_sum, start_time, loss) losses_train = model.loss_data() visualizer.plot_current_loss(losses_train, args.save_path)