def main(config):

    cudnn.benchmark = True  # to improve the efficiency

    # Create directories if not exist
    if not os.path.exists(config.model_path):
        os.makedirs(config.model_path)
    if not os.path.exists(config.result_path):
        os.makedirs(config.result_path)
    config.result_path = os.path.join(config.result_path, config.model_type)
    if not os.path.exists(config.result_path):
        os.makedirs(config.result_path)
    
    # lr = random.random()*0.0005 + 0.0000005
    # augmentation_prob = random.random()*0.7
    # epoch = random.choice([100, 150, 200, 250])
    # decay_ratio = random.random()*0.8
    # decay_epoch = int(epoch*decay_ratio)
    #
    # config.augmentation_prob = augmentation_prob
    # config.num_epochs = epoch
    # config.lr = lr
    # config.num_epochs_decay = decay_epoch

    print(config)

    # Notice the difference between these loaders
    train_loader = get_loader(config = config,
                              image_path=config.train_path,
                              crop_size=config.crop_size,
                              batch_size=config.batch_size,
                              sampler = sampler.SubsetRandomSampler(range(0,100000)),
                              num_workers=config.num_workers,
                              mode='train',
                              augmentation_prob=config.augmentation_prob)
    valid_loader = get_loader(config = config,
                              image_path=config.valid_path,
                              crop_size=config.crop_size,
                              batch_size=config.batch_size,
                              sampler = sampler.SubsetRandomSampler(range(100000,103943)),
                              num_workers=config.num_workers,
                              mode='valid',
                              augmentation_prob=0.)

    solver = Solver(config, train_loader, valid_loader)
    
    # Train and sample the images
    if config.mode == 'train':
        solver.train()
    elif config.mode == 'val':
        solver.val()
    else:
        solver.detect()
Пример #2
0
if not os.path.exists(config.model_path):
    os.makedirs(config.model_path)

# 建立模型
print("construct model...")
# net = model().to(device)
net = model().to(device)

# net = model().to(device)
utilize.load_pre_model(net, "models/best.ckpt")

# checkpoints = torch.load("models/cx_98.pth")
# net.load_state_dict(checkpoints)

# 加载测试集数据
test_loader = get_loader(train="test")
test_step = len(test_loader)

# 测试
print("Start test ...")
net.eval()

result, names = [], []
save_to_json = []
for i, (vf, label, name) in enumerate(test_loader):
    # af = af.to(device)
    vf = vf.to(device)
    # out = net(vf)
    out, _ = net(vf)

    # print(out.data.cpu().numpy())
Пример #3
0
def main(args):
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    if not os.path.exists(args.model_dir):
        os.makedirs(args.model_dir)

    transform = transforms.Compose([
        transforms.Resize(args.crop_size),
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    img_list = prepare_entry(args.train_dir, args.train_cap)
    sentences = [c for img in img_list for c in img['cap']]
    vocab = build_dictionary(sentences,
                             threshold=args.threshold,
                             dict_path=args.dict_path,
                             override=False)
    train_set = ImageCaptionSet(img_list, vocab, transform, shuffle=True)
    train_loader = get_loader(train_set,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=2,
                              drop_last=True)

    num_words = vocab.ukn_id + 1
    print('num_words:', num_words)
    model = CapGenerator(args.emb_dim, num_words, args.hidden_dim)

    if args.pretrained:
        model.load_state_dict(torch.load(args.pretrained))
        start_epoch = int(args.pretrained.split('/')[-1].split('_')[1]) + 1
    else:
        start_epoch = 1

    cnn_blocks = list(
        model.encoder.resnet_conv.children())[args.fine_tune_start_layer:]
    cnn_params = [list(sub_module.parameters()) for sub_module in cnn_blocks]
    cnn_params = [item for sublist in cnn_params for item in sublist]
    cnn_optimizer = torch.optim.Adam(cnn_params,
                                     lr=args.lr_cnn,
                                     betas=(args.alpha, args.beta))

    other_params = (list(model.encoder.ai2v.parameters()) +
                    list(model.encoder.ag2v.parameters()) +
                    list(model.decoder.parameters()))

    lr = args.lr
    criterion = nn.CrossEntropyLoss().cuda()
    model.cuda()
    iter_size = len(train_loader)
    #val_iter = len(val_loader)
    cider_scores = []
    best_cider = 0.0
    best_epoch = 0
    print('ITER size: {}', iter_size)
    for epoch in range(start_epoch, args.num_epochs + 1):
        if train_set.shuffle:
            np.random.shuffle(train_set.entries)
            print('shuffle train dataset')
        if epoch > args.lr_decay_start:
            frac = float(epoch - args.lr_decay_start) / args.lr_decay_ratio
            decay_fac = np.power(0.5, frac)
            lr = lr * decay_fac

        print('learning rate for Epoch {}: {:.3e}'.format(epoch, lr))
        optimizer = torch.optim.Adam(other_params,
                                     lr=lr,
                                     betas=(args.alpha, args.beta))
        model.train()
        for i, data in enumerate(train_loader):
            inputs, _, caps, last_pos = data
            inputs, caps = Variable(inputs).cuda(), Variable(caps).cuda()
            lstm_steps = max(last_pos)
            #targets = pack_padded_sequence(caps, last_pos, batch_first=True)
            model.zero_grad()
            packed_scores = model(inputs, caps, last_pos)
            targets = pack_padded_sequence(caps[:, 1:],
                                           last_pos,
                                           batch_first=True)
            #print(caps.shape, caps[:, 1:].shape, last_pos)
            loss = criterion(packed_scores[0], targets[0])
            loss.backward()

            #????
            for p in model.decoder.LSTM.parameters():
                p.data.clamp_(-args.clip, args.clip)

            optimizer.step()

            cnn_lr = args.lr_cnn
            if epoch > args.cnn_epoch:
                #cnn_lr = cnn_lr * decay_fac
                cnn_optimizer = torch.optim.Adam(cnn_params,
                                                 lr=cnn_lr,
                                                 betas=(args.alpha, args.beta))
                cnn_optimizer.step()

            scores = pad_packed_sequence(packed_scores, batch_first=True)[0]
            last = scores[-1]
            last_ind = list(last.max(1)[1].data)
            last_truth = list(caps[-1, 1:].data)
            print(
                'TRAIN ITER: {} / {}, lstm_steps:{}, loss: {:.4f},Perplexity:{}\r'
                .format(i, iter_size, lstm_steps, loss.data[0],
                        np.exp(loss.data[0])),
                end="")
        print("\n", end="")
        if epoch % args.save_freq == args.save_freq - 1:
            name = os.path.join(args.model_dir, 'epoch_{}'.format(epoch))
            torch.save(model.state_dict(), name)

        scores = pad_packed_sequence(packed_scores, batch_first=True)[0]
        last = scores[-1]
        last_ind = list(last.max(1)[1].data)
        last_truth = list(caps[-1, 1:].data)
        print(last_truth, last_pos[-1])

        print('pred: ', end="")
        for ix in last_ind:
            print(vocab.ix2word(ix), end="")
            if ix == 0:
                print("")
                break
            print(' ', end="")
        if ix != 0:
            print("\b.")
        print('truth: ', end="")
        for ix in last_truth:
            print(vocab.ix2word(ix), end="")
            if ix == 0:
                print("")
                break
            print(' ', end="")
        if ix != 0:
            print("\b.")

        #cider scores
        cider = coco_eval(model, args, epoch)
        cider_scores.append(cider)

        if cider > best_cider:
            best_cider = cider
            best_epoch = epoch

        if len(cider_scores) > 5:
            last_6 = np.array(cider_scores[-6:])
            if max(last_6) < best_cider:
                print(
                    'No improvement with CIDEr in the last 6 epochs...Early stopping triggered.'
                )
                print('Model of best epoch #: %d with CIDEr score %.2f' %
                      (best_epoch, best_cider))
                break

    torch.save(model.state_dict(), os.path.join(args.model_dir,
                                                'trained_model'))
Пример #4
0
    ###### 不用管,用不到 ########
    parser.add_argument('--patch_n', type=int, default=8)
    parser.add_argument('--patch_size', type=tuple, default=(16, 128, 128))
    parser.add_argument('--drop_background', type=float, default=0.1)

    parser.add_argument('--epochs', type=int, default=50)
    parser.add_argument('--lr', type=float, default=1e-4)
    ###### 不用管,用不到 ########

    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = TestArgs()
    test_data_loader = get_loader(args, 'test')
    model = Solver(args)

    print('iter_:' + str(args.test_iters))
    with torch.no_grad():
        MAE = []
        model.load_model(args.test_iters)
        for idx, data in enumerate(test_data_loader):
            model.set_input(data)
            MAE_, image_tuple = model.evalute()
            MAE.append(MAE_)
            visualizer.plot_images(image_tuple, args.test_iters, idx,
                                   args.save_path)

        print('epoch{}-MAE:{:.2f}+-{:.2f}'.format(args.test_iters,
                                                  np.mean(MAE), np.std(MAE)))
Пример #5
0
    print("SGD")
else:
    optimizer = torch.optim.AdamW(net.parameters(),
                                  lr=config.lr,
                                  weight_decay=0.05)

#根据式子进行计算
# scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=10,eta_min=5e-6,)

# 设置记录文件
log_txt = config.log_path + "epoch_loss_lr_" + str(
    config.lr) + "_batchsize_" + str(config.batchsize) + ".txt"
utilize.log_txt(filename=log_txt, mark=True)

# 加载数据
train_loader = get_loader(train="train")
total_step = len(train_loader)
eval_loader = get_loader(train="val")
eval_step = len(eval_loader)

log_info = []

print("train start...")
best_score = 0
for epoch in range(config.num_epochs):
    loss_epoch = 0

    net.train()
    t1 = time.time()
    # for i,(af,vf,  label,name) in enumerate(train_loader):
    for i, (vf, label, name) in enumerate(train_loader):
Пример #6
0
    parser.add_argument('--lr', type=float, default=1e-4)

    parser.add_argument('--save_iters', type=int, default=20)

    parser.add_argument('--patch_n', type=int, default=24)
    parser.add_argument('--patch_size', type=tuple, default=(128, 128))
    parser.add_argument('--drop_background', type=float, default=0.1)

    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = TrainArgs()
    # get_loader(data_path, data_type, batch_size, patch_n, patch_size, drop_background):
    train_data_loader = get_loader(args, 'train')
    model = Solver(args)

    start_time = time.time()
    iter_sum = len(train_data_loader)
    for epoch in range(args.epochs):
        # 训练
        for iter_, data in enumerate(train_data_loader):
            model.set_input(data)
            loss = model.train(epoch_end=(iter_ + 1 == iter_sum))
            visualizer.print_current_state(epoch, args.epochs, iter_, iter_sum,
                                           start_time, loss)

        losses_train = model.loss_data()
        visualizer.plot_current_loss(losses_train, args.save_path)