Пример #1
0
    def train(self, train_data, val_data=None):
        print('Now we begin training')
        train_dataloader = DataLoader(train_data,
                                      batch_size=self.opt.batch_size,
                                      shuffle=True)
        #val_dataloader = DataLoader(val_data,self.opt.batch_size,shuffle=True)

        vis = Visualizer(env=self.opt.env)

        if self.opt.use_gpu:
            self.model.cuda()

        previous_loss = 1e10
        loss_meter = meter.AverageValueMeter()
        Confusion_matrix = meter.ConfusionMeter(10)

        for epoch in range(self.opt.max_epoch):
            loss_meter.reset()
            Confusion_matrix.reset()
            for i, (data, label) in enumerate(train_dataloader, 0):
                if self.opt.use_gpu:
                    data = data.cuda()
                    label = label.cuda()
                self.optimizer.zero_grad()
                score = self.model(data)
                out_classes = T.argmax(score, 1)
                target_digit = T.argmax(label, 1)
                loss = self.criterion(score, label)
                loss.backward()
                self.optimizer.step()

                #指标更新
                loss_meter.add(loss.data.cpu())
                Confusion_matrix.add(out_classes, target_digit)
                accuracy = 100 * sum(
                    Confusion_matrix.value()[i, i]
                    for i in range(10)) / Confusion_matrix.value().sum()
                if i % self.opt.print_freq == self.opt.print_freq - 1:
                    print('EPOCH:{0},i:{1},loss:%.6f'.format(epoch, i) %
                          loss.data.cpu())
                vis.plot('loss', loss_meter.value()[0])
                vis.plot('test_accuracy', accuracy)
            if val_data:
                val_cm, val_ac = self.test(val_data, val=True)
                vis.plot('Val_accuracy', val_ac)
                vis.img('Val Confusion_matrix', T.Tensor(val_cm.value()))

            # 若损失不再下降则降低学习率
            if loss_meter.value()[-1] > previous_loss:
                self.opt.lr = self.opt.lr * self.opt.lr_decay
                print('learning rate:{}'.format(self.opt.lr))
                for param_group in self.optimizer.param_groups:
                    param_group['lr'] = self.opt.lr

            previous_loss = loss_meter.value()[-1]
Пример #2
0
def train(opt):
    seq = iaa.Sequential([
        iaa.CropToFixedSize(opt.fineSize, opt.fineSize),
    ])
    dataset_train = ImageDataset(opt.source_root_train,
                                 opt.gt_root_train,
                                 transform=seq)
    dataset_test = ImageDataset(opt.source_root_test,
                                opt.gt_root_test,
                                transform=seq)
    dataloader_train = DataLoader(dataset_train,
                                  batch_size=opt.batchSize,
                                  shuffle=True,
                                  num_workers=opt.nThreads)
    dataloader_test = DataLoader(dataset_test,
                                 batch_size=opt.batchSize,
                                 shuffle=False,
                                 num_workers=opt.nThreads)
    model = StainNet(opt.input_nc, opt.output_nc, opt.n_layer, opt.channels)
    model = nn.DataParallel(model).cuda()
    optimizer = SGD(model.parameters(), lr=opt.lr)
    loss_function = torch.nn.L1Loss()
    lrschedulr = lr_scheduler.CosineAnnealingLR(optimizer, opt.epoch)
    vis = Visualizer(env=opt.name)
    best_psnr = 0
    for i in range(opt.epoch):
        for j, (source_image,
                target_image) in tqdm(enumerate(dataloader_train)):
            target_image = target_image.cuda()
            source_image = source_image.cuda()
            output = model(source_image)
            loss = loss_function(output, target_image)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if (j + 1) % opt.display_freq == 0:
                vis.plot("loss", float(loss))
                vis.img("target image", target_image[0] * 0.5 + 0.5)
                vis.img("source image", source_image[0] * 0.5 + 0.5)
                vis.img("output", (output[0] * 0.5 + 0.5).clamp(0, 1))
        if (i + 1) % 5 == 0:
            test_result = test(model, dataloader_test)
            vis.plot_many(test_result)
            if best_psnr < test_result["psnr"]:
                save_path = "{}/{}_best_psnr_layer{}_ch{}.pth".format(
                    opt.checkpoints_dir, opt.name, opt.n_layer, opt.channels)
                best_psnr = test_result["psnr"]
                torch.save(model.module.state_dict(), save_path)
                print(save_path, test_result)
        lrschedulr.step()
        print("lrschedulr=", lrschedulr.get_last_lr())
Пример #3
0
def train(**kwargs):
    opt = Config()
    for k, v in kwargs.items():
        setattr(opt, k, v)

    vis = Visualizer(env=opt.env)
    dataloader = get_dataloader(opt)
    _data = dataloader.dataset._data
    word2ix, ix2word = _data['word2ix'], _data['ix2word']

    # cnn = tv.models.resnet50(True)
    model = CaptionModel(opt, None, word2ix, ix2word)
    if opt.model_ckpt:
        model.load(opt.model_ckpt)

    optimizer = model.get_optimizer(opt.lr1)
    criterion = t.nn.CrossEntropyLoss()

    model.cuda()
    criterion.cuda()

    loss_meter = meter.AverageValueMeter()
    perplexity = meter.AverageValueMeter()

    for epoch in range(opt.epoch):

        loss_meter.reset()
        perplexity.reset()
        for ii, (imgs, (captions, lengths),
                 indexes) in tqdm.tqdm(enumerate(dataloader)):
            optimizer.zero_grad()
            input_captions = captions[:-1]
            imgs = imgs.cuda()
            captions = captions.cuda()

            imgs = Variable(imgs)
            captions = Variable(captions)
            input_captions = captions[:-1]
            target_captions = pack_padded_sequence(captions, lengths)[0]

            score, _ = model(imgs, input_captions, lengths)
            loss = criterion(score, target_captions)
            loss.backward()
            # clip_grad_norm(model.rnn.parameters(),opt.grad_clip)
            optimizer.step()
            loss_meter.add(loss.data[0])
            perplexity.add(t.exp(loss.data)[0])

            # 可视化
            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                vis.plot('loss', loss_meter.value()[0])
                vis.plot('perplexity', perplexity.value()[0])

                # 可视化原始图片

                raw_img = _data['train']['ix2id'][indexes[0]]
                img_path = '/data/image/ai_cha/caption/ai_challenger_caption_train_20170902/caption_train_images_20170902/' + raw_img
                raw_img = Image.open(img_path).convert('RGB')
                raw_img = tv.transforms.ToTensor()(raw_img)
                vis.img('raw', raw_img)

                # raw_img = (imgs.data[0]*0.25+0.45).clamp(max=1,min=0)
                # vis.img('raw',raw_img)

                # 可视化人工的描述语句
                raw_caption = captions.data[:, 0]
                raw_caption = ''.join(
                    [_data['ix2word'][ii] for ii in raw_caption])
                vis.text(raw_caption, u'raw_caption')

                # 可视化网络生成的描述语句
                results = model.generate(imgs.data[0])
                vis.text('</br>'.join(results), u'caption')
        if (epoch + 1) % 100 == 0:
            model.save()
Пример #4
0
def train(**kwargs):
    opt._parse(kwargs)
    vis = Visualizer(opt.env,port = opt.vis_port)
    device = t.device('cuda') if opt.use_gpu else t.device('cpu')

    # 数据加载
    train_data = FLogo(opt.data_root,train=True)
    train_dataloader = DataLoader(train_data,opt.batch_size,shuffle=True,num_workers=opt.num_workers)

    '''
    # 以下内容是可视化dataloader的数据的
    一 检查dataset是否合理
    二 为了写论文凑图
    
    dataiter = iter(train_dataloader)
    img1,img2,lable=dataiter.next()
    img1 = tv.utils.make_grid((img1+1)/2,nrow=6,padding=2).numpy()
    img2 = tv.utils.make_grid((img2+1)/2,nrow=6,padding=2).numpy()
    plt.figure()
    plt.imshow(np.transpose(img1, (1, 2, 0)))
    plt.figure()
    plt.imshow(np.transpose(img2, (1, 2, 0)))
    plt.figure()
    lables = label.unsqueeze(1)  # lables
    mask = tv.utils.make_grid(lables,nrow=6,padding=2).numpy()
    plt.imshow(np.transpose(mask, (1, 2, 0)))
    plt.show()


from torchvision.transforms import ToPILImage
import numpy as np
import matplotlib.pylab as plt
train()
    '''

    # 网络
    net = Net()
    net.train()

    # 加载预训练模型
    if opt.load_model_path:
        net.load_state_dict(t.load(opt.load_model_path,map_location = lambda storage,loc:storage),False)
        print('已加载完。。')
    else:
        # 模型初始化
        for m in net.modules():
            if isinstance(m, (nn.Conv2d, nn.Linear)):
                nn.init.xavier_normal_(m.weight)
                print('模型参数完成初始化。。')
    net.to(device)

    # 损失函数和优化器
    criterion = nn.BCEWithLogitsLoss(pos_weight=opt.pos_weight.to(device))
    optimizer = t.optim.SGD(net.parameters(),lr=opt.lr, momentum=opt.momentum,weight_decay=opt.weight_decay)

    # 使用meter模块
    loss_meter = meter.AverageValueMeter()

    # 学习率调整策略
    # scheduler = StepLR(optimizer, step_size=1000, gamma=0.5)

    for epoch in range(opt.epoches):
        loss_meter.reset() # 重置loss_meter??
        for ii,(target_img,query_logo,mask) in tqdm.tqdm(enumerate(train_dataloader)):
            print(target_img.shape)
            # 训练
            target_img = target_img.to(device)
            query_logo = query_logo.to(device)

            mask = mask.to(device)

            optimizer.zero_grad()

            output = net(query_logo,target_img)
            output = output.squeeze()
            predict = t.sigmoid(output)
            # predict_mask = t.sigmoid(output) # true output should be sigmoid
            # ipdb.set_trace()
            true_mask = mask/255

            # predict = output.view(output.size(0),-1)
            # target = true_mask.view(true_mask.size(0),-1)
            # ipdb.set_trace()
            # print(predict.size(),target.size())


            # loss = criterion(F.softmax(output,dim=2),true_mask)
            loss = criterion(output,true_mask)
            # print(loss.item())

            loss.backward()
            optimizer.step()

            # meter update and visualize
            loss_meter.add(loss.item())
            if (ii+1)%opt.plot_every == 0:

                vis.img('target_img', ((target_img + 1) / 2).data[0])
                vis.img('query_logo', ((query_logo + 1) / 2).data[0])
                vis.img('truth groud', (true_mask.data[0]))
                vis.img('predict', predict.data[0])
                pre_judgement = predict.data[0]
                pre_judgement[pre_judgement > 0.5] = 1  # 改成0.7怎么样!
                pre_judgement[pre_judgement <= 0.5] = 0
                vis.img('pre_judge(>0.5)', pre_judgement)

                # vis.img('pre_judge', pre_judgement)
                # vis.log({'predicted':output.data[0].cpu().numpy()})
                # vis.log({'truth groud':true_mask.data[0].cpu().numpy()})

        print('finish epoch:',epoch)
        # vis.log({'predicted':output.data[0].cpu().numpy()})
        vis.plot('loss',loss_meter.value()[0])

        if (epoch+1) %opt.save_model_epoch == 0:
            vis.save([opt.env])
            t.save(net.state_dict(),'checkpoints/%s_localize_v6.pth' % epoch)
Пример #5
0
def train(**kwargs):
    opt = Config()
    for k, v in kwargs.items():
        setattr(opt, k, v)
    device=t.device('cuda') if opt.use_gpu else t.device('cpu')

    opt.caption_data_path = 'caption.pth'  # 原始数据
    opt.test_img = ''  # 输入图片
    # opt.model_ckpt='caption_0914_1947' # 预训练的模型

    # 数据
    vis = Visualizer(env=opt.env)
    dataloader = get_dataloader(opt)
    _data = dataloader.dataset._data
    word2ix, ix2word = _data['word2ix'], _data['ix2word']

    # 模型
    model = CaptionModel(opt, word2ix, ix2word)
    if opt.model_ckpt:
        model.load(opt.model_ckpt)
    optimizer = model.get_optimizer(opt.lr)
    criterion = t.nn.CrossEntropyLoss()
   
    model.to(device)

    # 统计
    loss_meter = meter.AverageValueMeter()

    for epoch in range(opt.epoch):
        loss_meter.reset()
        for ii, (imgs, (captions, lengths), indexes) in tqdm.tqdm(enumerate(dataloader)):
            # 训练
            optimizer.zero_grad()
            imgs = imgs.to(device)
            captions = captions.to(device)
            input_captions = captions[:-1]
            target_captions = pack_padded_sequence(captions, lengths)[0]
            score, _ = model(imgs, input_captions, lengths)
            loss = criterion(score, target_captions)
            loss.backward()
            optimizer.step()
            loss_meter.add(loss.item())

            # 可视化
            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                vis.plot('loss', loss_meter.value()[0])

                # 可视化原始图片 + 可视化人工的描述语句
                raw_img = _data['ix2id'][indexes[0]]
                img_path = opt.img_path + raw_img
                raw_img = Image.open(img_path).convert('RGB')
                raw_img = tv.transforms.ToTensor()(raw_img)

                raw_caption = captions.data[:, 0]
                raw_caption = ''.join([_data['ix2word'][ii] for ii in raw_caption])
                vis.text(raw_caption, u'raw_caption')
                vis.img('raw', raw_img, caption=raw_caption)

                # 可视化网络生成的描述语句
                results = model.generate(imgs.data[0])
                vis.text('</br>'.join(results), u'caption')
        model.save()
Пример #6
0
def train(**kwargs):
    # step1:config
    opt.parse(**kwargs)
    vis = Visualizer(opt.env)
    device = t.device('cuda') if opt.use_gpu else t.device('cpu')
    
    # step2:data
    # dataloader, style_img
    # 这次图片的处理和之前不一样,之前都是normalize,这次改成了lambda表达式乘以255,这种转化之后要给出一个合理的解释
    # 图片共分为两种,一种是原图,一种是风格图片,在作者的代码里,原图用于训练,需要很多,风格图片需要一张,用于损失函数
    
    transforms = T.Compose([
        T.Resize(opt.image_size),
        T.CenterCrop(opt.image_size),
        T.ToTensor(),
        T.Lambda(lambda x: x*255)    
    ])
    # 这次获取图片的方式和第七章一样,仍然是ImageFolder的方式,而不是dataset的方式
    dataset = tv.datasets.ImageFolder(opt.data_root,transform=transforms)
    dataloader = DataLoader(dataset,batch_size=opt.batch_size,shuffle=True,num_workers=opt.num_workers,drop_last=True)
    
    style_img = get_style_data(opt.style_path) # 1*c*H*W
    style_img = style_img.to(device)
    vis.img('style_image',(style_img.data[0]*0.225+0.45).clamp(min=0,max=1)) # 个人觉得这个没必要,下次可以实验一下
    
    # step3: model:Transformer_net 和 损失网络vgg16
    # 整个模型分为两部分,一部分是转化模型TransformerNet,用于转化原始图片,一部分是损失模型Vgg16,用于评价损失函数,
    # 在这里需要注意一下,Vgg16只是用于评价损失函数的,所以它的参数不参与反向传播,只有Transformer的参数参与反向传播,
    # 也就意味着,我们只训练TransformerNet,只保存TransformerNet的参数,Vgg16的参数是在网络设计时就已经加载进去的。
    # Vgg16是以验证model.eval()的方式在运行,表示其中涉及到pooling等层会发生改变
    # 那模型什么时候开始model.eval()呢,之前是是val和test中就会这样设置,那么Vgg16的设置理由是什么?
    # 这里加载模型的时候,作者使用了简单的map_location的记录方法,更轻巧一些
    # 发现作者在写这些的时候越来越趋向方便的方式
    # 在cuda的使用上,模型的cuda是直接使用的,而数据的cuda是在正式训练的时候才使用的,注意一下两者的区别
    # 在第七章作者是通过两种方式实现网络分离的,一种是对于前面网络netg,进行 fake_img = netg(noises).detach(),使得非叶子节点变成一个类似不需要邱求导的叶子节点
    # 第四章还需要重新看,
    
    transformer_net = TransformerNet()
    
    if opt.model_path:
        transformer_net.load_state_dict(t.load(opt.model_path,map_location= lambda _s, _: _s))    
    transformer_net.to(device)
    

    
    # step3: criterion and optimizer
    optimizer = t.optim.Adam(transformer_net.parameters(),opt.lr)
    # 此通过vgg16实现的,损失函数包含两个Gram矩阵和均方误差,所以,此外,我们还需要求Gram矩阵和均方误差
    vgg16 = Vgg16().eval() # 待验证
    vgg16.to(device)
    # vgg的参数不需要倒数,但仍然需要反向传播
    # 回头重新考虑一下detach和requires_grad的区别
    for param in vgg16.parameters():
        param.requires_grad = False
    criterion = t.nn.MSELoss(reduce=True, size_average=True)
    
    
    # step4: meter 损失统计
    style_meter = meter.AverageValueMeter()
    content_meter = meter.AverageValueMeter()
    total_meter = meter.AverageValueMeter()
    
    # step5.2:loss 补充
    # 求style_image的gram矩阵
    # gram_style:list [relu1_2,relu2_2,relu3_3,relu4_3] 每一个是b*c*c大小的tensor
    with t.no_grad():
        features = vgg16(style_img)
        gram_style = [gram_matrix(feature) for feature in features]
    # 损失网络 Vgg16
    # step5: train
    for epoch in range(opt.epoches):
        style_meter.reset()
        content_meter.reset()
        
        # step5.1: train
        for ii,(data,_) in tqdm(enumerate(dataloader)):
            optimizer.zero_grad()
            # 这里作者没有进行 Variable(),与之前不同
            # pytorch 0.4.之后tensor和Variable不再严格区分,创建的tensor就是variable
            # https://mp.weixin.qq.com/s?__biz=MzI0ODcxODk5OA==&mid=2247494701&idx=2&sn=ea8411d66038f172a2f553770adccbec&chksm=e99edfd4dee956c23c47c7bb97a31ee816eb3a0404466c1a57c12948d807c975053e38b18097&scene=21#wechat_redirect
            data = data.to(device)
            y = transformer_net(data)
            # vgg对输入的图片需要进行归一化
            data = normalize_batch(data)
            y = normalize_batch(y)

           
            feature_data = vgg16(data)
            feature_y = vgg16(y) 
            # 疑问??现在的feature是一个什么样子的向量?
            
            # step5.2: loss:content loss and style loss
            # content_loss
            # 在这里和书上的讲的不一样,书上是relu3_3,代码用的是relu2_2
            # https://blog.csdn.net/zhangxb35/article/details/72464152?utm_source=itdadao&utm_medium=referral
            # 均方误差指的是一个像素点的损失,可以理解N*b*h*w个元素加起来,然后除以N*b*h*w
            # 随机梯度下降法本身就是对batch内loss求平均后反向传播
            content_loss = opt.content_weight*criterion(feature_y.relu2_2,feature_data.relu2_2)
            # style loss
            # style loss:relu1_2,relu2_2,relu3_3,relu3_4 
            # 此时需要求每一张图片的gram矩阵
            
            style_loss = 0
            # tensor也可以 for i in tensor:,此时只拆解外面一层的tensor
            # ft_y:b*c*h*w, gm_s:1*c*h*w
            for ft_y, gm_s in zip(feature_y, gram_style):
                gram_y = gram_matrix(ft_y)
                style_loss += criterion(gram_y, gm_s.expand_as(gram_y))
            style_loss *= opt.style_weight
            
            total_loss = content_loss + style_loss
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()
            #import ipdb
            #ipdb.set_trace()
            # 获取tensor的值 tensor.item()   tensor.tolist()
            content_meter.add(content_loss.item())
            style_meter.add(style_loss.item())
            total_meter.add(total_loss.item())
            
            # step5.3: visualize
            if (ii+1)%opt.print_freq == 0 and opt.vis:
                # 为什么总是以这种形式进行debug
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()
                vis.plot('content_loss',content_meter.value()[0])
                vis.plot('style_loss',style_meter.value()[0])
                vis.plot('total_loss',total_meter.value()[0])
                # 因为现在data和y都已经经过了normalize,变成了-2~2,所以需要把它变回去0-1
                vis.img('input',(data.data*0.225+0.45)[0].clamp(min=0,max=1))
                vis.img('output',(y.data*0.225+0.45)[0].clamp(min=0,max=1))
            
        # step 5.4 save and validate and visualize
        if (epoch+1) % opt.save_every == 0:
            t.save(transformer_net.state_dict(), 'checkpoints/%s_style.pth' % epoch)
            # 保存图片的几种方法,第七章的是 
            # tv.utils.save_image(fix_fake_imgs,'%s/%s.png' % (opt.img_save_path, epoch),normalize=True, range=(-1,1))
            # vis.save竟然没找到  我的神   
            vis.save([opt.env])
Пример #7
0
def train():
    model = IMAGE_AI_MODEL()
    model.train()
    model.cuda()
    criterion = t.nn.NLLLoss()
    optimizer = t.optim.Adam(model.parameters(), lr=1e-3)
    dataloader = get_dataloader()
    data_set = dataloader.dataset
    print(len(data_set))
    ix2word = dataloader.dataset.ix2word
    _data = dataloader.dataset._data
    vis = Visualizer(env='word_embedding_caption')
    loss_meter = meter.AverageValueMeter()
    for epoch in range(10):
        loss_meter.reset()
        for ii, (img_lows, img_highs, cap_tensor, lengths,
                 indexs) in tqdm.tqdm(enumerate(dataloader)):
            optimizer.zero_grad()
            loss = 0
            bitch_target_length = 0
            for i in range(8):
                decoder_hidden = img_lows[[i]].unsqueeze(0)
                cell_hidden = decoder_hidden.clone()
                encoder_outputs = img_highs[i]
                target_tensor = cap_tensor[i]
                target_length = lengths[i]
                bitch_target_length += target_length
                decoder_input = t.tensor([0])
                decoder_hidden = decoder_hidden.cuda()
                cell_hidden = cell_hidden.cuda()
                encoder_outputs = encoder_outputs.cuda()
                target_tensor = target_tensor.cuda()
                decoder_input = decoder_input.cuda()
                raw_img = _data['ix2id'][indexs[i]]
                img_path_q = 'ai_challenger_caption_train_20170902/caption_train_images_20170902/'
                img_path = img_path_q + raw_img
                ture_words = []
                for w in range(target_length):
                    ture_words.append(ix2word[target_tensor[w].item()])
                    ture_words.append('|')
                decoded_words = []
                for di in range(target_length):
                    decoder_output, decoder_hidden, cell_hidden, decoder_attention = model(
                        decoder_input, decoder_hidden, cell_hidden,
                        encoder_outputs)
                    loss += criterion(decoder_output, target_tensor[[di]])
                    decoder_input = target_tensor[[di]]
                    topv, topi = decoder_output.data.topk(1)
                    if topi.item() == 2:
                        decoded_words.append('<EOS>')
                        break
                    else:
                        decoded_words.append(ix2word[topi.item()])
            loss.backward()
            loss_batch = loss.item() / bitch_target_length
            loss_meter.add(loss_batch)
            optimizer.step()
            plot_every = 10
            if (ii + 1) % plot_every == 0:
                vis.plot('loss', loss_meter.value()[0])
                raw_img = Image.open(img_path).convert('RGB')
                raw_img = tv.transforms.ToTensor()(raw_img)
                vis.img('raw', raw_img)
                raw_caption = ''.join(decoded_words)
                vis.text(raw_caption, win='raw_caption')
                ture_caption = ''.join(ture_words)
                vis.text(ture_caption, win='ture_caption')
        # save
        prefix = 'IMAGE_AI_MODEL'
        path = '{prefix}_{time}'.format(prefix=prefix,
                                        time=time.strftime('%m%d_%H%M'))
        t.save(model.state_dict(), path)
Пример #8
0
def train(**kwargs):
    opt = Config()
    for k, v in kwargs.items():
        setattr(opt, k, v)
    device = t.device('cuda') if opt.use_gpu else t.device('cpu')

    opt.caption_data_path = 'caption.pth'  # 原始数据
    opt.test_img = ''  # 输入图片
    # opt.model_ckpt='caption_0914_1947' # 预训练的模型

    # 数据
    vis = Visualizer(env=opt.env)
    dataloader = get_dataloader(opt)
    _data = dataloader.dataset._data
    word2ix, ix2word = _data['word2ix'], _data['ix2word']

    # 模型
    model = CaptionModel(opt, word2ix, ix2word)
    if opt.model_ckpt:
        model.load(opt.model_ckpt)
    optimizer = model.get_optimizer(opt.lr)
    criterion = t.nn.CrossEntropyLoss()

    model.to(device)

    # 统计
    loss_meter = meter.AverageValueMeter()

    for epoch in range(opt.epoch):
        loss_meter.reset()
        for ii, (imgs, (captions, lengths),
                 indexes) in tqdm.tqdm(enumerate(dataloader)):
            # 训练
            optimizer.zero_grad()
            imgs = imgs.to(device)
            captions = captions.to(device)
            input_captions = captions[:-1]
            target_captions = pack_padded_sequence(captions, lengths)[0]
            score, _ = model(imgs, input_captions, lengths)
            loss = criterion(score, target_captions)
            loss.backward()
            optimizer.step()
            loss_meter.add(loss.item())

            # 可视化
            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                vis.plot('loss', loss_meter.value()[0])

                # 可视化原始图片 + 可视化人工的描述语句
                raw_img = _data['ix2id'][indexes[0]]
                img_path = opt.img_path + raw_img
                raw_img = Image.open(img_path).convert('RGB')
                raw_img = tv.transforms.ToTensor()(raw_img)

                raw_caption = captions.data[:, 0]
                raw_caption = ''.join(
                    [_data['ix2word'][ii] for ii in raw_caption])
                vis.text(raw_caption, u'raw_caption')
                vis.img('raw', raw_img, caption=raw_caption)

                # 可视化网络生成的描述语句
                results = model.generate(imgs.data[0])
                vis.text('</br>'.join(results), u'caption')
        model.save()
Пример #9
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    model = models.KeypointModel(opt)
    if opt.model_path is not None:
        model.load(opt.model_path)

    model.cuda()
    dataset = Dataset(opt)
    dataloader = t.utils.data.DataLoader(dataset,
                                         opt.batch_size,
                                         num_workers=opt.num_workers,
                                         shuffle=True,
                                         drop_last=True)

    lr1, lr2 = opt.lr1, opt.lr2
    optimizer = model.get_optimizer(lr1, lr2)
    loss_meter = tnt.meter.AverageValueMeter()
    pre_loss = 1e100
    model.save()
    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        start = time.time()

        for ii, (img, gt, weight) in tqdm(enumerate(dataloader)):
            optimizer.zero_grad()
            img = t.autograd.Variable(img).cuda()
            target = t.autograd.Variable(gt).cuda()
            weight = t.autograd.Variable(weight).cuda()
            outputs = model(img)
            loss, loss_list = l2_loss(outputs, target, weight)
            (loss).backward()
            loss_meter.add(loss.data[0])
            optimizer.step()

            # 可视化, 记录, log,print
            if ii % opt.plot_every == 0 and ii > 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()
                vis_plots = {'loss': loss_meter.value()[0], 'ii': ii}
                vis.plot_many(vis_plots)

                # 随机展示一张图片
                k = t.randperm(img.size(0))[0]
                show = img.data[k].cpu()
                raw = (show * 0.225 + 0.45).clamp(min=0, max=1)

                train_masked_img = mask_img(raw, outputs[-1].data[k][14])
                origin_masked_img = mask_img(raw, gt[k][14])

                vis.img('target', origin_masked_img)
                vis.img('train', train_masked_img)
                vis.img('label', gt[k][14])
                vis.img('predict', outputs[-1].data[k][14].clamp(max=1, min=0))
                paf_img = tool.vis_paf(raw, gt[k][15:])
                train_paf_img = tool.vis_paf(
                    raw, outputs[-1][k].data[15:].clamp(min=-1, max=1))
                vis.img('paf_train', train_paf_img)
                #fig = tool.show_paf(np.transpose(raw.cpu().numpy(),(1,2,0)),gt[k][15:].cpu().numpy().transpose((1,2,0))).get_figure()
                #paf_img = tool.fig2data(fig).astype(np.int32)
                #vis.img('paf',t.from_numpy(paf_img/255).float())
                vis.img('paf', paf_img)
        model.save(loss_meter.value()[0])
        vis.save([opt.env])