def train(opt): seq = iaa.Sequential([ iaa.CropToFixedSize(opt.fineSize, opt.fineSize), ]) dataset_train = ImageDataset(opt.source_root_train, opt.gt_root_train, transform=seq) dataset_test = ImageDataset(opt.source_root_test, opt.gt_root_test, transform=seq) dataloader_train = DataLoader(dataset_train, batch_size=opt.batchSize, shuffle=True, num_workers=opt.nThreads) dataloader_test = DataLoader(dataset_test, batch_size=opt.batchSize, shuffle=False, num_workers=opt.nThreads) model = StainNet(opt.input_nc, opt.output_nc, opt.n_layer, opt.channels) model = nn.DataParallel(model).cuda() optimizer = SGD(model.parameters(), lr=opt.lr) loss_function = torch.nn.L1Loss() lrschedulr = lr_scheduler.CosineAnnealingLR(optimizer, opt.epoch) vis = Visualizer(env=opt.name) best_psnr = 0 for i in range(opt.epoch): for j, (source_image, target_image) in tqdm(enumerate(dataloader_train)): target_image = target_image.cuda() source_image = source_image.cuda() output = model(source_image) loss = loss_function(output, target_image) optimizer.zero_grad() loss.backward() optimizer.step() if (j + 1) % opt.display_freq == 0: vis.plot("loss", float(loss)) vis.img("target image", target_image[0] * 0.5 + 0.5) vis.img("source image", source_image[0] * 0.5 + 0.5) vis.img("output", (output[0] * 0.5 + 0.5).clamp(0, 1)) if (i + 1) % 5 == 0: test_result = test(model, dataloader_test) vis.plot_many(test_result) if best_psnr < test_result["psnr"]: save_path = "{}/{}_best_psnr_layer{}_ch{}.pth".format( opt.checkpoints_dir, opt.name, opt.n_layer, opt.channels) best_psnr = test_result["psnr"] torch.save(model.module.state_dict(), save_path) print(save_path, test_result) lrschedulr.step() print("lrschedulr=", lrschedulr.get_last_lr())
def train(self, train_data, val_data=None): print('Now we begin training') train_dataloader = DataLoader(train_data, batch_size=self.opt.batch_size, shuffle=True) #val_dataloader = DataLoader(val_data,self.opt.batch_size,shuffle=True) vis = Visualizer(env=self.opt.env) if self.opt.use_gpu: self.model.cuda() previous_loss = 1e10 loss_meter = meter.AverageValueMeter() Confusion_matrix = meter.ConfusionMeter(10) for epoch in range(self.opt.max_epoch): loss_meter.reset() Confusion_matrix.reset() for i, (data, label) in enumerate(train_dataloader, 0): if self.opt.use_gpu: data = data.cuda() label = label.cuda() self.optimizer.zero_grad() score = self.model(data) out_classes = T.argmax(score, 1) target_digit = T.argmax(label, 1) loss = self.criterion(score, label) loss.backward() self.optimizer.step() #指标更新 loss_meter.add(loss.data.cpu()) Confusion_matrix.add(out_classes, target_digit) accuracy = 100 * sum( Confusion_matrix.value()[i, i] for i in range(10)) / Confusion_matrix.value().sum() if i % self.opt.print_freq == self.opt.print_freq - 1: print('EPOCH:{0},i:{1},loss:%.6f'.format(epoch, i) % loss.data.cpu()) vis.plot('loss', loss_meter.value()[0]) vis.plot('test_accuracy', accuracy) if val_data: val_cm, val_ac = self.test(val_data, val=True) vis.plot('Val_accuracy', val_ac) vis.img('Val Confusion_matrix', T.Tensor(val_cm.value())) # 若损失不再下降则降低学习率 if loss_meter.value()[-1] > previous_loss: self.opt.lr = self.opt.lr * self.opt.lr_decay print('learning rate:{}'.format(self.opt.lr)) for param_group in self.optimizer.param_groups: param_group['lr'] = self.opt.lr previous_loss = loss_meter.value()[-1]
def train(**kwargs): # step: configure opt._parse(**kwargs) device = t.device('cuda') if opt.use_gpu else t.device('cpu') if opt.env: vis = Visualizer(env=opt.env) # step: data data, word2ix, ix2word = get_data( opt) # data numpy二维数组, word2ix, ix2word 字典 # from_numpy共享内存,一个数字的变化也会影响另一个,但是t.tensor不会共享内存,两个基本完全独立 data = t.from_numpy(data) # 这里是因为鸭子类型,还需要考虑考虑 dataloader = DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) # step: model && criterion && meter && optimizer model = PoetryModel(len(word2ix), opt.embedding_dim, opt.hidden_dim, opt.num_layers) if opt.model_path: model.load_state_dict(t.load(opt.model_path)) model.to(device) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() loss_meter = meter.AverageValueMeter() # step: train for epoch in range(opt.epoch): loss_meter.reset() for ii, x in tqdm(enumerate(dataloader)): # embedding层的输入必须是LongTensor型 # 现在x是tensor (batchsize*seq_len),LSTM的输入需要是(seq_len, batch_size, embedding_dim) # 矩阵的转置会导致存储空间不连续, 需要调用.contiguous()方法使其连续 x = x.long().transpose(1, 0).contiguous() x = x.to(device) optimizer.zero_grad() input, target = x[:-1, :], x[ 1:, :] # target :(seq_len, batch_size) # 运行的时候这里要看一下大小 output, _ = model( input) # output size (seq_len*batch_size, vocab_size) loss = criterion(output, target.view(-1)) # 交叉熵损失的定义 # 这里需要重新想明白,这个lstm是怎么个输入输出 loss.backward() optimizer.step() # 这里的loss是一个只有一个数字的tensor, # loss.item()返回一个新的Python的对应的类型,不共享内存,改变不会影响彼此 # 经师兄提醒,才注意到计算评价loss的时候,需要想办法去除掉loss.backward等特性,避免时间长了占内存, # 这里没有loss.data,loss.data也会有backward等特性,还是属于tensor系列,突然感觉自己还是遗漏了好多点。 # 现在只能一边做,一边查缺补漏,看到哪里,学到哪里,对于一些细节要经常去查。 # 这里需要重新解释一下,每一个tensor代表一个计算图,如果直接使用tensor进行累加的话,会造total_loss的计算图不断累加的 # 有点乱了,我去,不管了,先记住,对于损失累加,我们只使用loss.item,这是种完全截断计算图的方法 loss_meter.add(loss.item()) # step: visualize and validate if (ii + 1) % opt.print_freq == 0 and opt.env: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) # 诗歌原文 # x tensor size (seq_len, batch_size) # 二重列表生成式, poetrys:[['我''你'],[..]] poetries = [[ix2word[word_] for word_ in x[:, j_]] for j_ in range(x.shape[1])] # origin_poetries =[] # origin_poetries_tmp = [] # # range(data_.shape[1] # for j_ in range(3): # for word_ in x[:,j_].tolist(): # origin_poetries_tmp.append(ix2word[word_]) # origin_poetries.append(origin_poetries_tmp) # origin_poetries_tmp = [] vis.log('<br/>'.join( [''.join(origin_poe) for origin_poe in origin_poetries]), win=u'origin_poem') # 生成的诗歌 gen_poetris = [] # 分别以这几个字作为诗歌的第一个字,生成8首诗 验证模型 # gen_poetris 二重list,每一个list都是一首诗 [['我','你'],[]] for word in list(u'春江花月夜凉如水'): gen_poetry = generate(model, word, ix2word, word2ix) gen_poetris.append(gen_poetry) # gen_poetris 二重列表,与poetries一致 vis.log('<br/>'.join( [''.join(gen_poe) for gen_poe in gen_poetris]), win=u'gen_poem') t.save(model.state_dict(), '{0}_{1}.pth'.format(opt.model_prefix, epoch))
def train(**kwargs): opt = Config() for k, v in kwargs.items(): setattr(opt, k, v) vis = Visualizer(env=opt.env) dataloader = get_dataloader(opt) _data = dataloader.dataset._data word2ix, ix2word = _data['word2ix'], _data['ix2word'] # cnn = tv.models.resnet50(True) model = CaptionModel(opt, None, word2ix, ix2word) if opt.model_ckpt: model.load(opt.model_ckpt) optimizer = model.get_optimizer(opt.lr1) criterion = t.nn.CrossEntropyLoss() model.cuda() criterion.cuda() loss_meter = meter.AverageValueMeter() perplexity = meter.AverageValueMeter() for epoch in range(opt.epoch): loss_meter.reset() perplexity.reset() for ii, (imgs, (captions, lengths), indexes) in tqdm.tqdm(enumerate(dataloader)): optimizer.zero_grad() input_captions = captions[:-1] imgs = imgs.cuda() captions = captions.cuda() imgs = Variable(imgs) captions = Variable(captions) input_captions = captions[:-1] target_captions = pack_padded_sequence(captions, lengths)[0] score, _ = model(imgs, input_captions, lengths) loss = criterion(score, target_captions) loss.backward() # clip_grad_norm(model.rnn.parameters(),opt.grad_clip) optimizer.step() loss_meter.add(loss.data[0]) perplexity.add(t.exp(loss.data)[0]) # 可视化 if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) vis.plot('perplexity', perplexity.value()[0]) # 可视化原始图片 raw_img = _data['train']['ix2id'][indexes[0]] img_path = '/data/image/ai_cha/caption/ai_challenger_caption_train_20170902/caption_train_images_20170902/' + raw_img raw_img = Image.open(img_path).convert('RGB') raw_img = tv.transforms.ToTensor()(raw_img) vis.img('raw', raw_img) # raw_img = (imgs.data[0]*0.25+0.45).clamp(max=1,min=0) # vis.img('raw',raw_img) # 可视化人工的描述语句 raw_caption = captions.data[:, 0] raw_caption = ''.join( [_data['ix2word'][ii] for ii in raw_caption]) vis.text(raw_caption, u'raw_caption') # 可视化网络生成的描述语句 results = model.generate(imgs.data[0]) vis.text('</br>'.join(results), u'caption') if (epoch + 1) % 100 == 0: model.save()
def train(**kwargs): opt.parse(kwargs) if opt.use_visdom: vis = Visualizer(opt.env) # step 1: configure model # model = densenet169(pretrained=True) # model = DenseNet169(num_classes=2) # model = ResNet152(num_classes=2) model = getattr(models, opt.model)() if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: print('CUDA MODEL!') model.cuda() model.train() # step 2: data train_data = MURA_Dataset(opt.data_root, opt.train_image_paths, train=True, test=False) val_data = MURA_Dataset(opt.data_root, opt.test_image_paths, train=False, test=False) print('Training images:', len(train_data), 'Validation images:', len(val_data)) train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers) # step 3: criterion and optimizer A = 21935 N = 14873 weight = t.Tensor([A / (A + N), N / (A + N)]) if opt.use_gpu: weight = weight.cuda() criterion = t.nn.CrossEntropyLoss(weight=weight) # criterion = FocalLoss(alpha=weight, class_num=2) lr = opt.lr optimizer = t.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay) # step 4: meters loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e10 # step 5: train if not os.path.exists(os.path.join('checkpoints', model.model_name)): os.mkdir(os.path.join('checkpoints', model.model_name)) prefix = time.strftime('%m%d') if not os.path.exists(os.path.join('checkpoints', model.model_name, prefix)): os.mkdir(os.path.join('checkpoints', model.model_name, prefix)) s = t.nn.Softmax() for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label, _, body_part) in tqdm(enumerate(train_dataloader)): # train model input = Variable(data) target = Variable(label) # body_part = Variable(body_part) if opt.use_gpu: input = input.cuda() target = target.cuda() # body_part = body_part.cuda() optimizer.zero_grad() if opt.model.startswith('MultiBranch'): score = model(input, body_part) else: score = model(input) loss = criterion(score, target) loss.backward() optimizer.step() # meters update and visualize loss_meter.add(loss.data[0]) confusion_matrix.add(s(Variable(score.data)).data, target.data) if ii % opt.print_freq == opt.print_freq - 1: if opt.use_visdom: vis.plot('loss', loss_meter.value()[0]) # print('loss', loss_meter.value()[0]) # debug if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() ck_name = f'epoch_{epoch}_{str(opt)}.pth' model.save( os.path.join('checkpoints', model.model_name, prefix, ck_name)) # model.save() # validate and visualize val_cm, val_accuracy, val_loss = val(model, val_dataloader) cm = confusion_matrix.value() if opt.use_visdom: vis.plot('val_accuracy', val_accuracy) vis.log( "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm},train_acc:{train_acc}, " "val_acc:{val_acc}".format( epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr, train_acc=str(100. * (cm[0][0] + cm[1][1]) / (cm.sum())), val_acc=str(100. * (val_cm.value()[0][0] + val_cm.value()[1][1]) / (val_cm.value().sum())))) print('val_accuracy: ', val_accuracy) print( "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm},train_acc:{train_acc}, " "val_acc:{val_acc}".format( epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr, train_acc=100. * (cm[0][0] + cm[1][1]) / (cm.sum()), val_acc=100. * (val_cm.value()[0][0] + val_cm.value()[1][1]) / (val_cm.value().sum()))) # update learning rate if loss_meter.value()[0] > previous_loss: # if val_loss > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr # previous_loss = val_loss previous_loss = loss_meter.value()[0]
def train(**kwargs): print(kwargs) start = time.time() # 根据命令行参数更新配置 vis = Visualizer(opt.env) opt.parse(kwargs) # 加载词向量 print("Loading word vectors...Please wait.") vector = KeyedVectors.load_word2vec_format( os.path.join(os.path.dirname(os.path.realpath(opt.train_data_root)), 'vector.txt') ) print("Successfully loaded word vectors.") # step1: 模型 model = getattr(models, opt.model)(input_size=vector.vector_size+2, output_size=opt.class_num) if opt.load_model_path: model.load(opt.load_model_path) # 预加载 if opt.use_gpu and t.cuda.is_available(): model = model.cuda() print(f"Structure of {model.model_name}:\n{model}\n") # step2: 数据 train_data = Sentence(root=opt.train_data_root, relations=opt.relations, max_length=opt.max_length, vector=vector, train=True) # 训练集 train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True) val_data = Sentence(opt.train_data_root, opt.relations, opt.max_length, vector, train=False) # 验证集 val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=True) # step3: 目标函数和优化器 loss_fn = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.Adam(params=model.parameters(), lr=lr, weight_decay = opt.weight_decay) # step4: 统计指标:平滑处理之后的损失,还有混淆矩阵 loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(opt.class_num) previous_loss = 1e100 # 训练 for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in enumerate(train_dataloader): # 训练模型参数 input = data target = label if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() prediction = model(input) loss = loss_fn(prediction, target) loss.backward() optimizer.step() # 更新统计指标以及可视化 loss_meter.add(loss.item()) confusion_matrix.add(prediction.data, target.data) # if ii % opt.print_freq == opt.print_freq - 1: # vis.plot('train loss', loss_meter.value()[0]) # 如果需要的话,进入debug模式 # if os.path.exists(opt.debug_file): # import ipdb; # ipdb.set_trace() cm_value = confusion_matrix.value() correct = 0 for i in range(cm_value.shape[0]): correct += cm_value[i][i] accuracy = 100. * correct / (cm_value.sum()) vis.plot('train loss', loss_meter.value()[0]) vis.plot('train accuracy', accuracy) if epoch % opt.save_epoch == opt.save_epoch -1: model.save() # 计算验证集上的指标及可视化 val_lm, val_cm, val_accuracy = val(model, val_dataloader) vis.plot('val loss', val_lm.value()[0]) vis.plot('val accuracy', val_accuracy) print("epoch:{epoch}, lr:{lr}, loss:{loss}\ntrain_cm:\n{train_cm}\nval_cm:\n{val_cm}" .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr) ) # 如果损失不再下降,则降低学习率 if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0] cost = int(time.time()) - int(start) print(f"Cost {int(cost/60)}min{cost%60}s.")
def train(**kwargs): # step: configure opt._parse(**kwargs) device = t.device('cuda') if opt.use_gpu else t.device('cpu') vis = Visualizer(env=opt.env) # step: data 这里是指要从数据加载过程中加载所需要的数据,所有数据只加载一次,也可以通过self的方式获取, dataloader = get_dataloader(opt) _data = dataloader.dataset.data # word2ix, ix2word = _data.word2ix, _data.ix2word # ix2id, id2ix = _data.ix2id, _data.id2ix word2ix, ix2word = _data['word2ix'], _data['ix2word'] ix2id, id2ix, end = _data['ix2id'], _data['id2ix'], _data['end'] eos_id = word2ix[end] # step: model # 刚刚看了看作者写的模型,在保存模型的时候把opt也一并保存了,这是要做什么的,貌似是为了在进行生成的时候用的 # 为了避免以后有漏洞,在这里定义模型的时候输入参数暂且按照作者的来,然后在生成的时候再返回来看各个参数的作用 # 因为word2ix,ix2word是定义数据集的时候用的,按理来说跟模型没有关系才对 model = CaptionModel(opt, len(ix2word) ) if opt.model_ckpt: model.load(opt.model_ckpt) model.to(device) # step: meter criterion optimizer loss_meter = meter.AverageValueMeter() criterion = t.nn.CrossEntropyLoss() optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) model.save() # step: train for epoch in range(opt.max_epoch): loss_meter.reset() for ii,(imgs, (captions, lengths), indexes) in tqdm.tqdm(enumerate(dataloader), total = len(dataloader)): optimizer.zero_grad() imgs = imgs.to(device) captions = captions.to(device) input_captions = captions[:-1] target_captions = pack_padded_sequence(captions, lengths).data # len score, _ = model(imgs, input_captions, lengths) # len*vocab loss = criterion(score, target_captions) loss.backward() optimizer.step() loss_meter.add(loss.data.item()) # step: visulize if(ii+1)%opt.print_freq == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) # picture+caption # indexes在这里用到了,因为要可视化图片,就需要知道当前是第几张图片,而主模型的输入是直接2048维特征。没有直接指示第几行图片, # 同时也说明了,有序的重要性,所以在提取图片特征的时候,不是直接读取文件,而是从id2ix中获取,来使得标题和图片都可以从id2ix中找到对应关系 # 如果是我,我肯定不会想到用id和序号做一个对应关系,说不定直接用列表存储所有图片名称, # 用列表终归不如用dict好,因为dict是可以反推回他是第几张图片 ix2id和id2ix,而列表只能是知道第几张图片的位置,不能反推。 img_path = os.path.join(opt.img_path, ix2id[indexes[0]]) raw_img = Image.open(img_path).convert('RGB') raw_img = tv.transforms.ToTensor()(raw_img) # captions_np = np.array(captions) # zheli shi weile bimian ziji buzhuyi er daozhi jisuantu de cunzai suoyi duiyu moxing de shuru he shuchu zuo qi ta caozuo shi douxian jiangqi xianshi zhuanhua cheng meiyou jisuantu biru detach() biru with t.no_grad() biru t_.data.tolist() biru t_.data biru qita raw_caption = captions.data[:,0].tolist() # raw_caption = ''.join([ix2word[i] for i in raw_caption]) # vis.img('raw_img', raw_img, caption=raw_caption) info = '<br>'.join([ix2id[indexes[0]],raw_caption]) vis.log(u'raw_caption', info, False) results, scores = model.generate(img=imgs.data[0], eos_id=eos_id) cap_sentences = [ ''.join([ix2word[ix.item()] for ix in sentence]) for sentence in results] info = '<br>'.join(cap_sentences) info = '<br>'.join([ix2id[indexes[0]],info]) vis.log(u'val', info, False) model.save()
def train(**kwargs): """ 训练 """ # 根据命令行参数更新配置 opt._parse(kwargs) vis = Visualizer(opt.env, port=opt.vis_port) # step1: configure model 模型 model = getattr(models, opt.model)() # 最后的()不要忘 if opt.load_model_path: model.load(opt.load_model_path) model.to(opt.device) # 这一行和书中相比,改过 # step2: data 数据 train_dataset = BatteryCap(opt.train_data_root, train=True) # 训练集 train_dataloader = DataLoader(train_dataset, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataset = BatteryCap(opt.train_data_root, train=False) # 交叉验证集 val_dataloader = DataLoader(val_dataset, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # step3: criterion and optimizer 目标函数和优化器 criterion = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = model.get_optimizer(lr, opt.weight_decay) # step4: meters 统计指标:平滑处理之后的损失,还有混淆矩阵 loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e10 # train 训练 for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in tqdm(enumerate(train_dataloader)): # train model 训练模型参数 input_batch = data.to(opt.device) label_batch = label.to(opt.device) optimizer.zero_grad() # 梯度清零 score = model(input_batch) print("网络输出的:", score) print("-------------------------------") print("label", label) print("-------------------------------") print("softmax后:", t.nn.functional.softmax(score.detach(), dim=1).detach().tolist()) print("-------------------------------") loss = criterion(score, label_batch) loss.backward() # 反向传播 optimizer.step() # 优化 # meters update and visualize 更新统计指标及可视化 loss_meter.add(loss.item()) # detach 一下更安全保险 confusion_matrix.add(score.detach(), label_batch.detach()) if (ii + 1) % opt.print_freq == 0: vis.plot('loss', loss_meter.value()[0]) # 先不可视化了!!! print(' loss: ', loss_meter.value()[0]) # 如果需要的话,进入debug模式 if os.path.exists(opt.debug_file): import ipdb; ipdb.set_trace() model.save() # validate and visualize 计算验证集上的指标及可视化 val_cm, val_accuracy = val(model, val_dataloader) vis.plot('val_accuracy', val_accuracy) vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format( epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr)) # update learning rate 如果损失不再下降,则降低学习率 if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0] print('第', str(epoch), '个迭代已结束') print("验证集准确率为: ", str(val_accuracy)) print('---' * 50)
def train(**kwargs): config.parse(kwargs) vis = Visualizer(port=2333, env=config.env) train_roots = [ os.path.join(config.data_root, 'Features_Normal'), os.path.join(config.data_root, 'Features_Horizontal'), os.path.join(config.data_root, 'Features_Vertical'), os.path.join(config.data_root, 'Features_Horizontal_Vertical') ] val_roots = [os.path.join(config.data_root, 'Features')] train_data = Feature_Dataset(train_roots, config.train_paths, phase='train', balance=config.data_balance) val_data = Feature_Dataset(val_roots, config.test_paths, phase='val', balance=config.data_balance) print('Training Feature Lists:', train_data.__len__(), 'Validation Feature Lists:', val_data.__len__()) train_dataloader = DataLoader(train_data, batch_size=1, shuffle=True, num_workers=config.num_workers) val_dataloader = DataLoader(val_data, batch_size=1, shuffle=False, num_workers=config.num_workers) # prepare model model = BiLSTM_CRF(tag_to_ix=tag_to_ix, embedding_dim=EMBEDDING_DIM, hidden_dim=HIDDEN_DIM, num_layers=NUM_LAYERS) if config.load_model_path: model.load(config.load_model_path) if config.use_gpu: model.cuda() model.train() # criterion and optimizer lr = config.lr optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4) # metric loss_meter = meter.AverageValueMeter() previous_loss = 100000 previous_acc = 0 # train if not os.path.exists(os.path.join('checkpoints', model.model_name)): os.mkdir(os.path.join('checkpoints', model.model_name)) for epoch in range(config.max_epoch): loss_meter.reset() train_cm = [[0] * 3, [0] * 3, [0] * 3] count = 0 # train for i, (features, labels, feature_paths) in tqdm(enumerate(train_dataloader)): # prepare input target = torch.LongTensor([tag_to_ix[t[0]] for t in labels]) feat = Variable(features.squeeze()) # target = Variable(target) if config.use_gpu: feat = feat.cuda() # target = target.cuda() model.zero_grad() try: neg_log_likelihood = model.neg_log_likelihood(feat, target) except NameError: count += 1 continue neg_log_likelihood.backward() optimizer.step() loss_meter.add(neg_log_likelihood.data[0]) result = model(feat) for t, r in zip(target, result[1]): train_cm[t][r] += 1 if i % config.print_freq == config.print_freq - 1: vis.plot('loss', loss_meter.value()[0]) print('loss', loss_meter.value()[0]) train_accuracy = 100. * sum( [train_cm[c][c] for c in range(config.num_classes)]) / np.sum(train_cm) val_cm, val_accuracy, val_loss = val(model, val_dataloader) if val_accuracy > previous_acc: if config.save_model_name: model.save( os.path.join('checkpoints', model.model_name, config.save_model_name)) else: model.save( os.path.join('checkpoints', model.model_name, model.model_name + '_best_model.pth')) previous_acc = val_accuracy vis.plot_many({ 'train_accuracy': train_accuracy, 'val_accuracy': val_accuracy }) vis.log( "epoch: [{epoch}/{total_epoch}], lr: {lr}, loss: {loss}".format( epoch=epoch + 1, total_epoch=config.max_epoch, lr=lr, loss=loss_meter.value()[0])) vis.log('train_cm:') vis.log(train_cm) vis.log('val_cm') vis.log(val_cm) print('train_accuracy:', train_accuracy, 'val_accuracy:', val_accuracy) print("epoch: [{epoch}/{total_epoch}], lr: {lr}, loss: {loss}".format( epoch=epoch + 1, total_epoch=config.max_epoch, lr=lr, loss=loss_meter.value()[0])) print('train_cm:') print(train_cm) print('val_cm:') print(val_cm) print('Num of NameError:', count) # update learning rate if loss_meter.value()[0] > previous_loss: lr = lr * config.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): opt = Config() for k, v in kwargs.items(): setattr(opt, k, v) device=t.device('cuda') if opt.use_gpu else t.device('cpu') opt.caption_data_path = 'caption.pth' # 原始数据 opt.test_img = '' # 输入图片 # opt.model_ckpt='caption_0914_1947' # 预训练的模型 # 数据 vis = Visualizer(env=opt.env) dataloader = get_dataloader(opt) _data = dataloader.dataset._data word2ix, ix2word = _data['word2ix'], _data['ix2word'] # 模型 model = CaptionModel(opt, word2ix, ix2word) if opt.model_ckpt: model.load(opt.model_ckpt) optimizer = model.get_optimizer(opt.lr) criterion = t.nn.CrossEntropyLoss() model.to(device) # 统计 loss_meter = meter.AverageValueMeter() for epoch in range(opt.epoch): loss_meter.reset() for ii, (imgs, (captions, lengths), indexes) in tqdm.tqdm(enumerate(dataloader)): # 训练 optimizer.zero_grad() imgs = imgs.to(device) captions = captions.to(device) input_captions = captions[:-1] target_captions = pack_padded_sequence(captions, lengths)[0] score, _ = model(imgs, input_captions, lengths) loss = criterion(score, target_captions) loss.backward() optimizer.step() loss_meter.add(loss.item()) # 可视化 if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) # 可视化原始图片 + 可视化人工的描述语句 raw_img = _data['ix2id'][indexes[0]] img_path = opt.img_path + raw_img raw_img = Image.open(img_path).convert('RGB') raw_img = tv.transforms.ToTensor()(raw_img) raw_caption = captions.data[:, 0] raw_caption = ''.join([_data['ix2word'][ii] for ii in raw_caption]) vis.text(raw_caption, u'raw_caption') vis.img('raw', raw_img, caption=raw_caption) # 可视化网络生成的描述语句 results = model.generate(imgs.data[0]) vis.text('</br>'.join(results), u'caption') model.save()
def train(**kwargs): # setting the parameter in opt as the input argument for k, v in kwargs.items(): setattr(opt, k, v.strip("'")) # setting the device opt.device = t.device('cuda') if opt.use_gpu else t.device('cpu') device = opt.device vis = Visualizer(env=opt.env) # get the sequence from sequence.npz data, word2ix_train, ix2word_train, word2ix_fix, ix2word_fix = load_data( opt.parsed_data_path) random.shuffle(data) #devide the data for the test and train and convert to the dataloader devision = int(len(data) * 8 / 10) train_data = data[:devision] test_data = data[devision + 1:] train_data = t.from_numpy(train_data) test_data = t.from_numpy(test_data) dataloader = t.utils.data.DataLoader(train_data, batch_size=opt.batch_size, shuffle=True, num_workers=1) dataloader_fortest = t.utils.data.DataLoader(test_data, batch_size=opt.batch_size, shuffle=True, num_workers=1) # define the model model = TrainingModel_Vec(len(word2ix_train), len(word2ix_fix), 200, 400) optimizer = t.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr) criterion = nn.CrossEntropyLoss() loss_meter = meter.AverageValueMeter() # load the pretrained word vector and convert it to a matrix in the order of index pretrained_weight = form_matrix(ix2word_fix, opt.pathforvec) pretrained_weight = np.array(pretrained_weight) # copy the pretrained vectors to the embeding model.embeddingsfix.weight.data.copy_(t.from_numpy(pretrained_weight)) i = 0 for epoch in range(opt.epoch): loss_meter.reset() for ii, data_ in tqdm.tqdm(enumerate(dataloader)): data_ = data_.long().transpose(1, 0).contiguous() data_ = data_.to(device) optimizer.zero_grad() input_, target = data_[:-1, :], data_[1:, :] output, _ = model(input_) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() loss_meter.add(loss.item()) # plot the loss if (1 + ii) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('lossintrain', loss_meter.value()[0]) #print("loss_meter.value()[0] : " + str(loss_meter.value()[0])) vis.plot('lossintrain', loss.item()) #print("loss.item() : " + str(loss.item())) # for test loss_meter.reset() model.eval() # 设置为test模式 test_loss = 0 # 初始化测试损失值为0 correct = 0 # 初始化预测正确的数据个数为0 total = 0 for iii, datatest in enumerate(dataloader_fortest): #if args.cuda: # data, target = data.cuda(), target.cuda() datatest = datatest.long().transpose(1, 0).contiguous() datatest = datatest.to(device) optimizer.zero_grad() input_test, target_test = datatest[:-1, :], datatest[ 1:, :] #后面这个是是去掉了第一行,前面这个是去掉最后一行 output_test, _ = model(input_test) test_loss += criterion(output_test, target_test.view(-1)) #print("loss_test: " + str(loss_test)) #loss_meter.add(loss_test.item()) #test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss 把所有loss值进行累加 pred = output_test.data.max(1, keepdim=True)[ 1] # get the index of the max log-probability 其中[0]是值[1]是index #print(output_test.size()) #print(target_test.size()) #print("right: " + str(pred.eq(target_test.data.view_as(pred)).cpu().sum())) #print(pred.size()[0]) #print(target_test) target_test = target_test.data.view_as( pred)[int(pred.size()[0] / 4 * 2):int(pred.size()[0] / 4 * 3)] #print(target_test) pred = pred[int(pred.size()[0] / 4 * 2):int(pred.size()[0] / 4 * 3)] #print("original: " + str(len(datatest.data[0]))) #print(target_test.data.view_as(pred).size()[0]) #print(target_test.data.view_as(pred).size()) correct += pred.eq(target_test).cpu().sum() # 对预测正确的数据个数进行累加 total += target_test.size()[0] #correct += find_in_ten(output_test.data,target_test.data) test_loss /= iii print(epoch) print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'. format(test_loss, correct, total, 100. * correct / total)) model.train() t.save(model.state_dict(), '%s_%s.pth' % ("testtestingfix", epoch))
def train(**kwargs): opt = DefaultConfig() opt.update(**kwargs) vis = Visualizer(opt['model']) logger = Logger() prefix = '' if opt['use_double_length']: prefix += '_2' print prefix if opt['use_char']: logger.info('Load char data starting...') opt['embed_num'] = opt['char_embed_num'] embed_mat = np.load(opt['char_embed']) train_title = np.load(opt['train_title_char' + prefix]) train_desc = np.load(opt['train_desc_char' + prefix]) train_label = np.load(opt['train_label']) val_title = np.load(opt['val_title_char' + prefix]) val_desc = np.load(opt['val_desc_char' + prefix]) val_label = np.load(opt['val_label']) logger.info('Load char data finished!') elif opt['use_word']: logger.info('Load word data starting...') opt['embed_num'] = opt['word_embed_num'] embed_mat = np.load(opt['word_embed']) train_title = np.load(opt['train_title_word' + prefix]) train_desc = np.load(opt['train_desc_word' + prefix]) train_label = np.load(opt['train_label']) val_title = np.load(opt['val_title_word' + prefix]) val_desc = np.load(opt['val_desc_word' + prefix]) val_label = np.load(opt['val_label']) logger.info('Load word data finished!') elif opt['use_char_word']: logger.info('Load char-word data starting...') embed_mat_char = np.load(opt['char_embed']) embed_mat_word = np.load(opt['word_embed']) embed_mat = np.vstack((embed_mat_char, embed_mat_word)) train_title = np.load(opt['train_title_char' + prefix]) train_desc = np.load(opt['train_desc_word' + prefix]) train_label = np.load(opt['train_label']) val_title = np.load(opt['val_title_char' + prefix]) val_desc = np.load(opt['val_desc_word' + prefix]) val_label = np.load(opt['val_label']) logger.info('Load char-word data finished!') elif opt['use_word_char']: logger.info('Load word-char data starting...') embed_mat_char = np.load(opt['char_embed']) embed_mat_word = np.load(opt['word_embed']) embed_mat = np.vstack((embed_mat_char, embed_mat_word)) train_title = np.load(opt['train_title_word' + prefix]) train_desc = np.load(opt['train_desc_char' + prefix]) train_label = np.load(opt['train_label']) val_title = np.load(opt['val_title_word' + prefix]) val_desc = np.load(opt['val_desc_char' + prefix]) val_label = np.load(opt['val_label']) logger.info('Load word-char data finished!') train_dataset = Dataset(title=train_title, desc=train_desc, label=train_label, class_num=opt['class_num']) train_loader = data.DataLoader(train_dataset, shuffle=True, batch_size=opt['batch_size']) val_dataset = Dataset(title=val_title, desc=val_desc, label=val_label, class_num=opt['class_num']) val_loader = data.DataLoader(val_dataset, shuffle=False, batch_size=opt['batch_size']) logger.info('Using model {}'.format(opt['model'])) Model = getattr(models, opt['model']) model = Model(embed_mat, opt) print model loss_weight = torch.ones(opt['class_num']) if opt['boost']: if opt['base_layer'] != 0: cal_res = torch.load('{}/{}/layer_{}_cal_res_3.pt'.format( opt['model_dir'], opt['model'], opt['base_layer']), map_location=lambda storage, loc: storage) logger.info('Load cal_res successful!') loss_weight = torch.load('{}/{}/layer_{}_loss_weight_3.pt'.format( opt['model_dir'], opt['model'], opt['base_layer'] + 1), map_location=lambda storage, loc: storage) else: cal_res = torch.zeros(opt['val_num'], opt['class_num']) print 'cur_layer:', opt['base_layer'] + 1, \ 'loss_weight:', loss_weight.mean(), loss_weight.max(), loss_weight.min(), loss_weight.std() if opt['use_self_loss']: Loss = getattr(models, opt['loss_function']) else: Loss = getattr(nn, opt['loss_function']) if opt['load']: if opt.get('load_name', None) is None: model = load_model(model, model_dir=opt['model_dir'], model_name=opt['model']) else: model = load_model(model, model_dir=opt['model_dir'], model_name=opt['model'], \ name=opt['load_name']) if opt['cuda'] and opt['device'] != None: torch.cuda.set_device(opt['device']) if opt['cuda']: model.cuda() loss_weight = loss_weight.cuda() # import sys # precision, recall, score = eval(val_loader, model, opt, save_res=True) # print precision, recall, score # sys.exit() loss_function = Loss(weight=loss_weight + 1 - loss_weight.mean()) optimizer = torch.optim.Adam(model.parameters(), lr=opt['lr']) logger.info('Start running...') steps = 0 model.train() base_epoch = opt['base_epoch'] for epoch in range(1, opt['epochs'] + 1): for i, batch in enumerate(train_loader, 0): title, desc, label = batch title, desc, label = Variable(title), Variable(desc), Variable( label).float() if opt['cuda']: title, desc, label = title.cuda(), desc.cuda(), label.cuda() optimizer.zero_grad() logit = model(title, desc) loss = loss_function(logit, label) loss.backward() optimizer.step() steps += 1 if steps % opt['log_interval'] == 0: corrects = ((logit.data > opt['threshold']) == (label.data).byte()).sum() accuracy = 100.0 * corrects / (opt['batch_size'] * opt['class_num']) log_info = 'Steps[{:>8}] (epoch[{:>2}] / batch[{:>5}]) - loss: {:.6f}, acc: {:.4f} % ({} / {})'.format( \ steps, epoch + base_epoch, (i+1), loss.data[0], accuracy, \ corrects, opt['batch_size'] * opt['class_num']) logger.info(log_info) vis.plot('loss', loss.data[0]) precision, recall, score = eval(batch, model, opt, isBatch=True) vis.plot('score', score) logger.info('Training epoch {} finished!'.format(epoch + base_epoch)) precision, recall, score = eval(val_loader, model, opt) log_info = 'Epoch[{}] - score: {:.6f} (precision: {:.4f}, recall: {:.4f})'.format( \ epoch + base_epoch, score, precision, recall) vis.log(log_info) save_model(model, model_dir=opt['model_dir'], model_name=opt['model'], \ epoch=epoch+base_epoch, score=score) if epoch + base_epoch == 2: model.opt['static'] = False elif epoch + base_epoch == 4: for param_group in optimizer.param_groups: param_group['lr'] = opt['lr'] * opt['lr_decay'] elif epoch + base_epoch >= 5: if opt['boost']: res, truth = eval(val_loader, model, opt, return_res=True) ori_score = get_score(cal_res, truth) cal_res += res cur_score = get_score(cal_res, truth) logger.info('Layer {}: {}, Layer {}: {}'.format( opt['base_layer'], ori_score, opt['base_layer'] + 1, cur_score)) loss_weight = get_loss_weight(cal_res, truth) torch.save( cal_res, '{}/{}/layer_{}_cal_res_3.pt'.format( opt['model_dir'], opt['model'], opt['base_layer'] + 1)) logger.info('Save cal_res successful!') torch.save( loss_weight, '{}/{}/layer_{}_loss_weight_3.pt'.format( opt['model_dir'], opt['model'], opt['base_layer'] + 2)) break
def train_stack(**kwargs): opt = DefaultConfig() opt.update(**kwargs) vis = Visualizer(opt['model']) logger = Logger() result_dir = '/home/dyj/' resmat = [(result_dir + 'RNN10_cal_res.pt', 10),\ (result_dir + 'TextCNN10_char.pt', 10),\ (result_dir + 'TextCNN10_top1.pt', 10),\ (result_dir + 'TextCNN10_top1_char.pt', 10),\ (result_dir + 'FastText10_res.pt', 10),\ ('/mnt/result/results/TextCNN5_12h.pt', 5),\ ('/mnt/result/results/RNN1_char.pt', 1) ] label = result_dir + 'label.pt' opt['stack_num'] = len(resmat) train_dataset = Stack_Dataset(resmat=resmat, label=label) train_loader = data.DataLoader(train_dataset, shuffle=True, batch_size=opt['batch_size']) logger.info('Using model {}'.format(opt['model'])) Model = getattr(models, opt['model']) model = Model(opt) print model if opt['use_self_loss']: Loss = getattr(models, opt['loss_function']) else: Loss = getattr(nn, opt['loss_function']) if opt['load']: if opt.get('load_name', None) is None: model = load_model(model, model_dir=opt['model_dir'], model_name=opt['model']) else: model = load_model(model, model_dir=opt['model_dir'], model_name=opt['model'], \ name=opt['load_name']) if opt['device'] != None: torch.cuda.set_device(opt['device']) if opt['cuda']: model.cuda() loss_function = Loss() optimizer = torch.optim.Adam(model.parameters(), lr=opt['lr']) logger.info('Start running...') steps = 0 model.train() for epoch in range(opt['base_epoch'] + 1, opt['epochs'] + 1): for i, batch in enumerate(train_loader, 1): resmat, label = batch[0:-1], batch[-1] resmat, label = [Variable(ii) for ii in resmat], Variable(label) if opt['cuda']: resmat, label = [ii.cuda() for ii in resmat], label.cuda() optimizer.zero_grad() logit = model(resmat) loss = loss_function(logit, label) loss.backward() optimizer.step() steps += 1 if steps % opt['log_interval'] == 0: corrects = ((logit.data > opt['threshold']) == (label.data).byte()).sum() accuracy = 100.0 * corrects / (opt['batch_size'] * opt['class_num']) log_info = 'Steps[{:>8}] (epoch[{:>2}] / batch[{:>5}]) - loss: {:.6f}, acc: {:.4f} % ({} / {})'.format( \ steps, epoch, i, loss.data[0], accuracy, \ corrects, opt['batch_size'] * opt['class_num']) logger.info(log_info) vis.plot('loss', loss.data[0]) precision, recall, score = get_score(logit.data.cpu(), label.data.cpu()) logger.info('Precision {}, Recall {}, Score {}'.format( precision, recall, score)) vis.plot('score', score) logger.info('Training epoch {} finished!'.format(epoch)) #save_model(model, model_dir=opt['model_dir'], model_name=opt['model'], epoch=epoch) if epoch == 3: for param_group in optimizer.param_groups: param_group['lr'] = opt['lr'] * opt['lr_decay'] save_model(model, model_dir=opt['model_dir'], model_name=opt['model'], epoch=epoch)
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) # setattr(object, name, value) 设置属性值 vis = Visualizer(env=opt.env) # 设置visdom的环境变量 # 获取数据 train_iter, valid_iter, test_iter, field = load_data() word2ix = field.vocab.stoi ix2word = field.vocab.itos # np.savez('data/word2ix.npz', word2ix = word2ix,ix2word = ix2word) # 模型定义 model = lstm(len(word2ix), 300, 150) best_model = model best_valid_loss = float("inf") optimizer = t.optim.Adam(model.parameters(), lr=opt.lr, weight_decay=1e-6) criterion = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) if opt.use_gpu: model.cuda() criterion.cuda() loss_meter = meter.AverageValueMeter() count = 0 for epoch in range(opt.epoch): model.train() loss_meter.reset() logging.info("这是第{0}次epoch".format(count + 1)) cnt = 0 for batch in tqdm.tqdm( train_iter ): # tqdm是一个python进度条库,可以封装iterator,it/s表示的就是每秒迭代了多少次 # 训练 data = batch.text if opt.use_gpu: data = data.cuda() optimizer.zero_grad() # 输入和目标错开,CharRNN的做法 input_, target = Variable(data[:-1, :]), Variable(data[1:, :]) output, _ = model(input_) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() loss_meter.add(loss.item()) # 可视化 if (1 + cnt) % opt.plot_every == 0: vis.plot('loss', loss_meter.value()[0]) cnt += 1 count += 1 valid_loss = evaluate(model, valid_iter, criterion) logging.info("第%d次验证集的loss为: %f" % (count, valid_loss)) if valid_loss < best_valid_loss: os.system('rm ' + opt.model_prefix + opt.model + '.pth') best_valid_loss = valid_loss best_model = model t.save(best_model.state_dict(), '%s%s.pth' % (opt.model_prefix, opt.model)) test_loss = evaluate(best_model, test_iter, criterion) logging.info("测试集的loss为: %f" % test_loss)
def train_attention(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) # setattr(object, name, value) 设置属性值 vis = Visualizer(env=opt.env) # 设置visdom的环境变量 logging.info("============attention的训练过程================") # 获取数据 train_iter, valid_iter, test_iter, field = load_data() word2ix = field.vocab.stoi ix2word = field.vocab.itos # 模型定义 model = lstm_att(len(word2ix), 300, 150) best_model = model best_valid_loss = float("inf") # lambda1 = lambda epoch: epoch // 5 # lambda2 = lambda epoch: 0.95 ** epoch optimizer = t.optim.Adam(model.parameters(), lr=opt.lr, weight_decay=1e-6) scheduler = t.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min') criterion = nn.NLLLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) if opt.use_gpu: model.cuda() criterion.cuda() loss_meter = meter.AverageValueMeter() count = 0 for epoch in range(opt.epoch): model.train() loss_meter.reset() logging.info("这是第{0}次epoch".format(count + 1)) cnt = 0 use_teacher_forcing = True if random.random( ) < teacher_forcing_ratio else False for batch in tqdm.tqdm(train_iter): loss = 0 # 训练 data = batch.text batch_size = data.shape[1] att_hidden = Variable( t.zeros(batch_size, 150), requires_grad=False) # (batch_size, hidden_dim) pre_hiddens = Variable(t.zeros(batch_size, 1, 150), requires_grad=False) if opt.use_gpu: data = data.cuda() att_hidden = att_hidden.cuda() pre_hiddens = pre_hiddens.cuda() optimizer.zero_grad() # 输入和目标错开,CharRNN的做法 input_, target_ = Variable(data[:-1, :]), Variable(data[1:, :]) max_len = input_.size(0) model.batch_size = batch_size hidden = model.init_hidden() for ii in range(max_len): input = input_[ii] # (batch_size,) target = target_[ii] output, att_hidden, pre_hidden, hidden, alpha = model( input, att_hidden, pre_hiddens, hidden) # logging.info("第%d次: %s" % (ii, alpha)) pre_hidden = pre_hidden.detach() pre_hiddens = t.cat((pre_hiddens, pre_hidden), 1) # topv, topi = decoder_output.topk(1) # decoder_input = topi.squeeze().detach() # detach from history as input loss += criterion(output, target) loss.backward() # 梯度剪裁 t.nn.utils.clip_grad_norm(model.parameters(), 5.) optimizer.step() loss_meter.add(loss.item() / max_len) # 可视化 if (1 + cnt) % opt.plot_every == 0: vis.plot('loss', loss_meter.value()[0]) # logging.info("训练第%d次batch_plot的loss为: %f" % ((cnt+1)/opt.plot_every, loss_meter.value()[0])) cnt += 1 count += 1 valid_loss = evaluate_att(model, valid_iter, criterion) scheduler.step(valid_loss) logging.info("======第%d次验证集的loss为: %f=====" % (count, valid_loss)) if valid_loss < best_valid_loss: best_valid_loss = valid_loss best_model = model t.save(best_model.state_dict(), '%s%s_%d.pth' % (opt.model_prefix, opt.model, count)) test_loss = evaluate_att(best_model, test_iter, criterion) logging.info("------测试集的loss为: %f" % test_loss) # 学习率减半 if epoch in [5, 10, 15]: for param_group in optimizer.param_groups: lr = param_group['lr'] lr *= 0.5 param_group['lr'] = lr
def train_twin(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) # setattr(object, name, value) 设置属性值 vis = Visualizer(env=opt.env) # 设置visdom的环境变量 # 获取数据 train_iter, valid_iter, test_iter, field = load_data() word2ix = field.vocab.stoi ix2word = field.vocab.itos # 模型定义 model = lstm_twin(len(word2ix), 300, 150) best_model = model best_valid_loss = float("inf") optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) scheduler = t.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', min_lr=1e-5) # CrossEntropyLoss 会把每个字符的损失求平均,所以损失是个10以内的数,如果加上size_average = False, 就变成一个10000以内的 # 数了,正好差不多2000倍吧,如果想以每句话为单位,那么就乘上seq_len criterion = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) if opt.use_gpu: model.cuda() criterion.cuda() count = 0 for epoch in range(opt.epoch): model.train() logging.info("这是第{0}次epoch".format(count + 1)) cnt = 0 b_fwd_loss, b_bwd_loss, b_twin_loss, b_all_loss = 0., 0., 0., 0. for batch in tqdm.tqdm( train_iter ): # tqdm是一个python进度条库,可以封装iterator,it/s表示的就是每秒迭代了多少次 # 训练 data = batch.text seq_len = data.size(0) # 生成一个倒着的序列,因为tensor不支持负步长 idx = np.arange(seq_len)[::-1].tolist() idx = t.LongTensor(idx) idx = Variable(idx).cuda() model.batch_size = data.size(1) hidden1 = model.init_hidden() hidden2 = model.init_hidden() if opt.use_gpu: data = data.cuda() optimizer.zero_grad() # 输入和目标错开,CharRNN的做法 f_input, f_target = Variable(data[:-1, :]), Variable(data[1:, :]) bx = data.index_select(0, idx) b_input, b_target = Variable(bx[:-1, :]), Variable(bx[1:, :]) # print(f_input.size(),b_input.size()) f_out, b_out, f_h, b_h = model(f_input, b_input, hidden1, hidden2) f_loss = criterion(f_out, f_target.view(-1)) b_loss = criterion(b_out, b_target.view(-1)) b_h_inv = b_h.index_select(0, idx[1:]) b_h_inv = b_h_inv[1:] #将<sos>去除 # print(f_h.size(), b_h_inv.size()) b_h_inv = b_h_inv.detach() f_h = f_h[:-1] #将<eos>去掉 twin_loss = ((f_h - b_h_inv)**2).mean() twin_loss *= 1.5 all_loss = f_loss + b_loss + twin_loss all_loss.backward() t.nn.utils.clip_grad_norm(model.parameters(), 5.) optimizer.step() # 累加 b_all_loss += all_loss.item() b_fwd_loss += f_loss.item() b_bwd_loss += b_loss.item() b_twin_loss += twin_loss.item() # 可视化 if (1 + cnt) % opt.plot_every == 0: vis.plot('all_loss', b_all_loss / opt.plot_every) vis.plot('twin_loss', b_twin_loss / opt.plot_every) vis.plot('loss', b_fwd_loss / opt.plot_every) # logging.info("训练第{}个plot的all_loss:{:f}, f_loss: {:f}, b_loss: {:f}, twin_loss: {:f}" # .format(int((cnt + 1) / opt.plot_every), b_all_loss / opt.plot_every, # b_fwd_loss / opt.plot_every, # b_bwd_loss / opt.plot_every, b_twin_loss / opt.plot_every)) b_fwd_loss, b_bwd_loss, b_twin_loss, b_all_loss = 0., 0., 0., 0. cnt += 1 count += 1 valid_loss = evaluate_twin(model, valid_iter, criterion) scheduler.step(valid_loss) logging.info("第%d次验证集的loss为: %f" % (count, valid_loss)) if valid_loss < best_valid_loss: # os.system('rm ' + opt.model_prefix +opt.model + '.pth') best_valid_loss = valid_loss best_model = model t.save(best_model.state_dict(), '%s%s_%d.pth' % (opt.model_prefix, opt.model, count)) test_loss = evaluate_twin(best_model, test_iter, criterion) logging.info("测试集的loss为: %f" % test_loss) # 学习率减半 if epoch in [5, 10, 15]: for param_group in optimizer.param_groups: lr = param_group['lr'] lr *= 0.5 param_group['lr'] = lr
def train(**kwargs): config.parse(kwargs) vis = Visualizer(port=2333, env=config.env) vis.log('Use config:') for k, v in config.__class__.__dict__.items(): if not k.startswith('__'): vis.log(f"{k}: {getattr(config, k)}") # prepare data train_data = VB_Dataset(config.train_paths, phase='train', useRGB=config.useRGB, usetrans=config.usetrans, padding=config.padding, balance=config.data_balance) val_data = VB_Dataset(config.test_paths, phase='val', useRGB=config.useRGB, usetrans=config.usetrans, padding=config.padding, balance=False) print('Training Images:', train_data.__len__(), 'Validation Images:', val_data.__len__()) dist = train_data.dist() print('Train Data Distribution:', dist, 'Val Data Distribution:', val_data.dist()) train_dataloader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers) val_dataloader = DataLoader(val_data, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers) # prepare model # model = ResNet18(num_classes=config.num_classes) # model = Vgg16(num_classes=config.num_classes) # model = densenet_collapse(num_classes=config.num_classes) model = ShallowVgg(num_classes=config.num_classes) print(model) if config.load_model_path: model.load(config.load_model_path) if config.use_gpu: model.cuda() if config.parallel: model = torch.nn.DataParallel( model, device_ids=[x for x in range(config.num_of_gpu)]) # criterion and optimizer # weight = torch.Tensor([1/dist['0'], 1/dist['1'], 1/dist['2'], 1/dist['3']]) # weight = torch.Tensor([1/dist['0'], 1/dist['1']]) # weight = torch.Tensor([dist['1'], dist['0']]) # weight = torch.Tensor([1, 10]) # vis.log(f'loss weight: {weight}') # print('loss weight:', weight) # weight = weight.cuda() # criterion = torch.nn.CrossEntropyLoss() criterion = LabelSmoothing(size=config.num_classes, smoothing=0.1) # criterion = torch.nn.CrossEntropyLoss(weight=weight) # criterion = FocalLoss(gamma=4, alpha=None) lr = config.lr optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=config.weight_decay) # metric softmax = functional.softmax log_softmax = functional.log_softmax loss_meter = meter.AverageValueMeter() epoch_loss = meter.AverageValueMeter() train_cm = meter.ConfusionMeter(config.num_classes) train_AUC = meter.AUCMeter() previous_avgse = 0 # previous_AUC = 0 if config.parallel: save_model_dir = config.save_model_dir if config.save_model_dir else model.module.model_name save_model_name = config.save_model_name if config.save_model_name else model.module.model_name + '_best_model.pth' else: save_model_dir = config.save_model_dir if config.save_model_dir else model.model_name save_model_name = config.save_model_name if config.save_model_name else model.model_name + '_best_model.pth' save_epoch = 1 # 用于记录验证集上效果最好模型对应的epoch # process_record = {'epoch_loss': [], # 用于记录实验过程中的曲线,便于画曲线图 # 'train_avgse': [], 'train_se0': [], 'train_se1': [], 'train_se2': [], 'train_se3': [], # 'val_avgse': [], 'val_se0': [], 'val_se1': [], 'val_se2': [], 'val_se3': []} process_record = { 'epoch_loss': [], # 用于记录实验过程中的曲线,便于画曲线图 'train_avgse': [], 'train_se0': [], 'train_se1': [], 'val_avgse': [], 'val_se0': [], 'val_se1': [], 'train_AUC': [], 'val_AUC': [] } # train for epoch in range(config.max_epoch): print( f"epoch: [{epoch+1}/{config.max_epoch}] {config.save_model_name[:-4]} ==================================" ) epoch_loss.reset() train_cm.reset() train_AUC.reset() # train model.train() for i, (image, label, image_path) in tqdm(enumerate(train_dataloader)): loss_meter.reset() # prepare input if config.use_gpu: image = image.cuda() label = label.cuda() # go through the model score = model(image) # backpropagate optimizer.zero_grad() # loss = criterion(score, label) loss = criterion(log_softmax(score, dim=1), label) loss.backward() optimizer.step() loss_meter.add(loss.item()) epoch_loss.add(loss.item()) train_cm.add(softmax(score, dim=1).data, label.data) positive_score = np.array([ item[1] for item in softmax(score, dim=1).data.cpu().numpy().tolist() ]) train_AUC.add(positive_score, label.data) if (i + 1) % config.print_freq == 0: vis.plot('loss', loss_meter.value()[0]) # print result # train_se = [100. * train_cm.value()[0][0] / (train_cm.value()[0][0] + train_cm.value()[0][1] + train_cm.value()[0][2] + train_cm.value()[0][3]), # 100. * train_cm.value()[1][1] / (train_cm.value()[1][0] + train_cm.value()[1][1] + train_cm.value()[1][2] + train_cm.value()[1][3]), # 100. * train_cm.value()[2][2] / (train_cm.value()[2][0] + train_cm.value()[2][1] + train_cm.value()[2][2] + train_cm.value()[2][3]), # 100. * train_cm.value()[3][3] / (train_cm.value()[3][0] + train_cm.value()[3][1] + train_cm.value()[3][2] + train_cm.value()[3][3])] train_se = [ 100. * train_cm.value()[0][0] / (train_cm.value()[0][0] + train_cm.value()[0][1]), 100. * train_cm.value()[1][1] / (train_cm.value()[1][0] + train_cm.value()[1][1]) ] # validate model.eval() if (epoch + 1) % 1 == 0: val_cm, val_se, val_accuracy, val_AUC = val_2class( model, val_dataloader) if np.average( val_se) > previous_avgse: # 当测试集上的平均sensitivity升高时保存模型 # if val_AUC.value()[0] > previous_AUC: # 当测试集上的AUC升高时保存模型 if config.parallel: if not os.path.exists( os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0])): os.makedirs( os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0])) model.module.save( os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0], save_model_name)) else: if not os.path.exists( os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0])): os.makedirs( os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0])) model.save( os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0], save_model_name)) previous_avgse = np.average(val_se) # previous_AUC = val_AUC.value()[0] save_epoch = epoch + 1 process_record['epoch_loss'].append(epoch_loss.value()[0]) process_record['train_avgse'].append(np.average(train_se)) process_record['train_se0'].append(train_se[0]) process_record['train_se1'].append(train_se[1]) # process_record['train_se2'].append(train_se[2]) # process_record['train_se3'].append(train_se[3]) process_record['train_AUC'].append(train_AUC.value()[0]) process_record['val_avgse'].append(np.average(val_se)) process_record['val_se0'].append(val_se[0]) process_record['val_se1'].append(val_se[1]) # process_record['val_se2'].append(val_se[2]) # process_record['val_se3'].append(val_se[3]) process_record['val_AUC'].append(val_AUC.value()[0]) # vis.plot_many({'epoch_loss': epoch_loss.value()[0], # 'train_avgse': np.average(train_se), 'train_se0': train_se[0], 'train_se1': train_se[1], 'train_se2': train_se[2], 'train_se3': train_se[3], # 'val_avgse': np.average(val_se), 'val_se0': val_se[0], 'val_se1': val_se[1], 'val_se2': val_se[2], 'val_se3': val_se[3]}) # vis.log(f"epoch: [{epoch+1}/{config.max_epoch}] =========================================") # vis.log(f"lr: {optimizer.param_groups[0]['lr']}, loss: {round(loss_meter.value()[0], 5)}") # vis.log(f"train_avgse: {round(np.average(train_se), 4)}, train_se0: {round(train_se[0], 4)}, train_se1: {round(train_se[1], 4)}, train_se2: {round(train_se[2], 4)}, train_se3: {round(train_se[3], 4)},") # vis.log(f"val_avgse: {round(np.average(val_se), 4)}, val_se0: {round(val_se[0], 4)}, val_se1: {round(val_se[1], 4)}, val_se2: {round(val_se[2], 4)}, val_se3: {round(val_se[3], 4)}") # vis.log(f'train_cm: {train_cm.value()}') # vis.log(f'val_cm: {val_cm.value()}') # print("lr:", optimizer.param_groups[0]['lr'], "loss:", round(epoch_loss.value()[0], 5)) # print('train_avgse:', round(np.average(train_se), 4), 'train_se0:', round(train_se[0], 4), 'train_se1:', round(train_se[1], 4), 'train_se2:', round(train_se[2], 4), 'train_se3:', round(train_se[3], 4)) # print('val_avgse:', round(np.average(val_se), 4), 'val_se0:', round(val_se[0], 4), 'val_se1:', round(val_se[1], 4), 'val_se2:', round(val_se[2], 4), 'val_se3:', round(val_se[3], 4)) # print('train_cm:') # print(train_cm.value()) # print('val_cm:') # print(val_cm.value()) vis.plot_many({ 'epoch_loss': epoch_loss.value()[0], 'train_avgse': np.average(train_se), 'train_se0': train_se[0], 'train_se1': train_se[1], 'val_avgse': np.average(val_se), 'val_se0': val_se[0], 'val_se1': val_se[1], 'train_AUC': train_AUC.value()[0], 'val_AUC': val_AUC.value()[0] }) vis.log( f"epoch: [{epoch + 1}/{config.max_epoch}] =========================================" ) vis.log( f"lr: {optimizer.param_groups[0]['lr']}, loss: {round(loss_meter.value()[0], 5)}" ) vis.log( f"train_avgse: {round(np.average(train_se), 4)}, train_se0: {round(train_se[0], 4)}, train_se1: {round(train_se[1], 4)}" ) vis.log( f"val_avgse: {round(np.average(val_se), 4)}, val_se0: {round(val_se[0], 4)}, val_se1: {round(val_se[1], 4)}" ) vis.log(f'train_AUC: {train_AUC.value()[0]}') vis.log(f'val_AUC: {val_AUC.value()[0]}') vis.log(f'train_cm: {train_cm.value()}') vis.log(f'val_cm: {val_cm.value()}') print("lr:", optimizer.param_groups[0]['lr'], "loss:", round(epoch_loss.value()[0], 5)) print('train_avgse:', round(np.average(train_se), 4), 'train_se0:', round(train_se[0], 4), 'train_se1:', round(train_se[1], 4)) print('val_avgse:', round(np.average(val_se), 4), 'val_se0:', round(val_se[0], 4), 'val_se1:', round(val_se[1], 4)) print('train_AUC:', train_AUC.value()[0], 'val_AUC:', val_AUC.value()[0]) print('train_cm:') print(train_cm.value()) print('val_cm:') print(val_cm.value()) if os.path.exists( os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0])): write_json(file=os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0], 'process_record.json'), content=process_record) # if (epoch+1) % 5 == 0: # lr = lr * config.lr_decay # for param_group in optimizer.param_groups: # param_group['lr'] = lr vis.log(f"Best Epoch: {save_epoch}") print("Best Epoch:", save_epoch)
def train_pair(**kwargs): config.parse(kwargs) vis = Visualizer(port=2333, env=config.env) vis.log('Use config:') for k, v in config.__class__.__dict__.items(): if not k.startswith('__'): vis.log(f"{k}: {getattr(config, k)}") # prepare data train_data = PairSWDataset(config.train_paths, phase='train', useRGB=config.useRGB, usetrans=config.usetrans, balance=config.data_balance) valpair_data = PairSWDataset(config.test_paths, phase='val_pair', useRGB=config.useRGB, usetrans=config.usetrans, balance=False) print('Training Samples:', train_data.__len__(), 'ValPair Samples:', valpair_data.__len__()) dist = train_data.dist() print('Train Data Distribution:', dist) train_dataloader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers) valpair_dataloader = DataLoader(valpair_data, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers) # prepare model model = SiameseNet(num_classes=config.num_classes) print(model) if config.load_model_path: model.load(config.load_model_path) if config.use_gpu: model.cuda() if config.parallel: model = torch.nn.DataParallel(model, device_ids=[x for x in range(config.num_of_gpu)]) model.train() # criterion and optimizer weight_pair = torch.Tensor([1, 1.5]) vis.log(f'pair loss weight: {weight_pair}') print('pair loss weight:', weight_pair) weight_pair = weight_pair.cuda() pair_criterion = torch.nn.CrossEntropyLoss(weight=weight_pair) lr = config.lr optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=config.weight_decay) # metric softmax = functional.softmax pair_loss_meter = meter.AverageValueMeter() pair_epoch_loss = meter.AverageValueMeter() pair_train_cm = meter.ConfusionMeter(config.num_classes) # previous_loss = 100 pair_previous_avg_se = 0 # train if config.parallel: if not os.path.exists(os.path.join('checkpoints', model.module.model_name)): os.mkdir(os.path.join('checkpoints', model.module.model_name)) else: if not os.path.exists(os.path.join('checkpoints', model.model_name)): os.mkdir(os.path.join('checkpoints', model.model_name)) for epoch in range(config.max_epoch): print(f"epoch: [{epoch+1}/{config.max_epoch}] =============================================") pair_train_cm.reset() pair_epoch_loss.reset() # train for i, (image_1, image_2, label_1, label_2, label_res, _, _) in tqdm(enumerate(train_dataloader)): pair_loss_meter.reset() # prepare input image_1 = Variable(image_1) image_2 = Variable(image_2) target_res = Variable(label_res) if config.use_gpu: image_1 = image_1.cuda() image_2 = image_2.cuda() target_res = target_res.cuda() # go through the model score_1, score_2, score_res = model(image_1, image_2) # backpropagate optimizer.zero_grad() pair_loss = pair_criterion(score_res, target_res) pair_loss.backward() optimizer.step() pair_loss_meter.add(pair_loss.data[0]) pair_epoch_loss.add(pair_loss.data[0]) pair_train_cm.add(softmax(score_res, dim=1).data, target_res.data) if (i+1) % config.print_freq == 0: vis.plot('loss', pair_loss_meter.value()[0]) # print result pair_train_se = [100. * pair_train_cm.value()[0][0] / (pair_train_cm.value()[0][0] + pair_train_cm.value()[0][1]), 100. * pair_train_cm.value()[1][1] / (pair_train_cm.value()[1][0] + pair_train_cm.value()[1][1])] model.eval() pair_val_cm, pair_val_accuracy, pair_val_se = val_pair(model, valpair_dataloader) if np.average(pair_val_se) > pair_previous_avg_se: # 当测试集上的平均sensitivity升高时保存模型 if config.parallel: save_model_dir = config.save_model_dir if config.save_model_dir else model.module.model_name save_model_name = config.save_model_name if config.save_model_name else model.module.model_name + '_best_model.pth' if not os.path.exists(os.path.join('checkpoints', save_model_dir)): os.makedirs(os.path.join('checkpoints', save_model_dir)) model.module.save(os.path.join('checkpoints', save_model_dir, save_model_name)) else: save_model_dir = config.save_model_dir if config.save_model_dir else model.model_name save_model_name = config.save_model_name if config.save_model_name else model.model_name + '_best_model.pth' if not os.path.exists(os.path.join('checkpoints', save_model_dir)): os.makedirs(os.path.join('checkpoints', save_model_dir)) model.save(os.path.join('checkpoints', save_model_dir, save_model_name)) pair_previous_avg_se = np.average(pair_val_se) if epoch+1 == config.max_epoch: # 保存最后一个模型 if config.parallel: save_model_dir = config.save_model_dir if config.save_model_dir else model.module.model_name save_model_name = config.save_model_name.split('.pth')[0]+'_last.pth' if config.save_model_name else model.module.model_name + '_last_model.pth' else: save_model_dir = config.save_model_dir if config.save_model_dir else model.model_name save_model_name = config.save_model_name.split('.pth')[0]+'_last.pth' if config.save_model_name else model.model_name + '_last_model.pth' if not os.path.exists(os.path.join('checkpoints', save_model_dir)): os.makedirs(os.path.join('checkpoints', save_model_dir)) model.save(os.path.join('checkpoints', save_model_dir, save_model_name)) vis.plot_many({'epoch_loss': pair_epoch_loss.value()[0], 'pair_train_avg_se': np.average(pair_train_se), 'pair_train_se_0': pair_train_se[0], 'pair_train_se_1': pair_train_se[1], 'pair_val_avg_se': np.average(pair_val_se), 'pair_val_se_0': pair_val_se[0], 'pair_val_se_1': pair_val_se[1]}) vis.log(f"epoch: [{epoch+1}/{config.max_epoch}] ===============================================") vis.log(f"lr: {lr}, loss: {round(pair_epoch_loss.value()[0], 5)}") vis.log(f"pair_train_avg_se: {round(np.average(pair_train_se), 4)}, pair_train_se_0: {round(pair_train_se[0], 4)}, pair_train_se_1: {round(pair_train_se[1], 4)}") vis.log(f"pair_val_avg_se: {round(sum(pair_val_se) / len(pair_val_se), 4)}, pair_val_se_0: {round(pair_val_se[0], 4)}, pair_val_se_1: {round(pair_val_se[1], 4)}") vis.log(f'pair_train_cm: {pair_train_cm.value()}') vis.log(f'pair_val_cm: {pair_val_cm.value()}') print("lr:", lr, "loss:", round(pair_epoch_loss.value()[0], 5)) print('pair_train_avg_se:', round(np.average(pair_train_se), 4), 'pair_train_se_0:', round(pair_train_se[0], 4), 'pair_train_se_1:', round(pair_train_se[1], 4)) print('pair_val_avg_se:', round(np.average(pair_val_se), 4), 'pair_val_se_0:', round(pair_val_se[0], 4), 'pair_val_se_1:', round(pair_val_se[1], 4)) print('pair_train_cm:') print(pair_train_cm.value()) print('pair_val_cm:') print(pair_val_cm.value()) # update learning rate # if loss_meter.value()[0] > previous_loss: # lr = lr * config.lr_decay # for param_group in optimizer.param_groups: # param_group['lr'] = lr # previous_loss = loss_meter.value()[0] if (epoch+1) % 5 == 0: lr = lr * config.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr
def train(**kwargs): opt.parse(kwargs) if opt.vis_env: vis = Visualizer(opt.vis_env, port=opt.vis_port) if opt.device is None or opt.device is 'cpu': opt.device = torch.device('cpu') else: opt.device = torch.device(opt.device) images, tags, labels = load_data(opt.data_path, type=opt.dataset) train_data = Dataset(opt, images, tags, labels) train_dataloader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True) # valid or test data x_query_data = Dataset(opt, images, tags, labels, test='image.query') x_db_data = Dataset(opt, images, tags, labels, test='image.db') y_query_data = Dataset(opt, images, tags, labels, test='text.query') y_db_data = Dataset(opt, images, tags, labels, test='text.db') x_query_dataloader = DataLoader(x_query_data, opt.batch_size, shuffle=False) x_db_dataloader = DataLoader(x_db_data, opt.batch_size, shuffle=False) y_query_dataloader = DataLoader(y_query_data, opt.batch_size, shuffle=False) y_db_dataloader = DataLoader(y_db_data, opt.batch_size, shuffle=False) query_labels, db_labels = x_query_data.get_labels() query_labels = query_labels.to(opt.device) db_labels = db_labels.to(opt.device) if opt.load_model_path: pretrain_model = None elif opt.pretrain_model_path: pretrain_model = load_pretrain_model(opt.pretrain_model_path) model = AGAH(opt.bit, opt.tag_dim, opt.num_label, opt.emb_dim, lambd=opt.lambd, pretrain_model=pretrain_model).to(opt.device) load_model(model, opt.load_model_path) optimizer = Adamax([{ 'params': model.img_module.parameters(), 'lr': opt.lr }, { 'params': model.txt_module.parameters() }, { 'params': model.hash_module.parameters() }, { 'params': model.classifier.parameters() }], lr=opt.lr * 10, weight_decay=0.0005) optimizer_dis = { 'img': Adamax(model.img_discriminator.parameters(), lr=opt.lr * 10, betas=(0.5, 0.9), weight_decay=0.0001), 'txt': Adamax(model.txt_discriminator.parameters(), lr=opt.lr * 10, betas=(0.5, 0.9), weight_decay=0.0001) } criterion_tri_cos = TripletAllLoss(dis_metric='cos', reduction='sum') criterion_bce = nn.BCELoss(reduction='sum') loss = [] max_mapi2t = 0. max_mapt2i = 0. FEATURE_I = torch.randn(opt.training_size, opt.emb_dim).to(opt.device) FEATURE_T = torch.randn(opt.training_size, opt.emb_dim).to(opt.device) U = torch.randn(opt.training_size, opt.bit).to(opt.device) V = torch.randn(opt.training_size, opt.bit).to(opt.device) FEATURE_MAP = torch.randn(opt.num_label, opt.emb_dim).to(opt.device) CODE_MAP = torch.sign(torch.randn(opt.num_label, opt.bit)).to(opt.device) train_labels = train_data.get_labels().to(opt.device) mapt2i_list = [] mapi2t_list = [] train_times = [] for epoch in range(opt.max_epoch): t1 = time.time() for i, (ind, x, y, l) in tqdm(enumerate(train_dataloader)): imgs = x.to(opt.device) tags = y.to(opt.device) labels = l.to(opt.device) batch_size = len(ind) h_x, h_y, f_x, f_y, x_class, y_class = model( imgs, tags, FEATURE_MAP) FEATURE_I[ind] = f_x.data FEATURE_T[ind] = f_y.data U[ind] = h_x.data V[ind] = h_y.data ##### # train txt discriminator ##### D_txt_real = model.dis_txt(f_y.detach()) D_txt_real = -D_txt_real.mean() optimizer_dis['txt'].zero_grad() D_txt_real.backward() # train with fake D_txt_fake = model.dis_txt(f_x.detach()) D_txt_fake = D_txt_fake.mean() D_txt_fake.backward() # train with gradient penalty alpha = torch.rand(batch_size, opt.emb_dim).to(opt.device) interpolates = alpha * f_y.detach() + (1 - alpha) * f_x.detach() interpolates.requires_grad_() disc_interpolates = model.dis_txt(interpolates) gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=torch.ones( disc_interpolates.size()).to( opt.device), create_graph=True, retain_graph=True, only_inputs=True)[0] gradients = gradients.view(gradients.size(0), -1) # 10 is gradient penalty hyperparameter txt_gradient_penalty = ( (gradients.norm(2, dim=1) - 1)**2).mean() * 10 txt_gradient_penalty.backward() loss_D_txt = D_txt_real - D_txt_fake optimizer_dis['txt'].step() ##### # train img discriminator ##### D_img_real = model.dis_img(f_x.detach()) D_img_real = -D_img_real.mean() optimizer_dis['img'].zero_grad() D_img_real.backward() # train with fake D_img_fake = model.dis_img(f_y.detach()) D_img_fake = D_img_fake.mean() D_img_fake.backward() # train with gradient penalty alpha = torch.rand(batch_size, opt.emb_dim).to(opt.device) interpolates = alpha * f_x.detach() + (1 - alpha) * f_y.detach() interpolates.requires_grad_() disc_interpolates = model.dis_img(interpolates) gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=torch.ones( disc_interpolates.size()).to( opt.device), create_graph=True, retain_graph=True, only_inputs=True)[0] gradients = gradients.view(gradients.size(0), -1) # 10 is gradient penalty hyperparameter img_gradient_penalty = ( (gradients.norm(2, dim=1) - 1)**2).mean() * 10 img_gradient_penalty.backward() loss_D_img = D_img_real - D_img_fake optimizer_dis['img'].step() ##### # train generators ##### # update img network (to generate txt features) domain_output = model.dis_txt(f_x) loss_G_txt = -domain_output.mean() # update txt network (to generate img features) domain_output = model.dis_img(f_y) loss_G_img = -domain_output.mean() loss_adver = loss_G_txt + loss_G_img loss1 = criterion_tri_cos(h_x, labels, target=h_y, margin=opt.margin) loss2 = criterion_tri_cos(h_y, labels, target=h_x, margin=opt.margin) theta1 = F.cosine_similarity(torch.abs(h_x), torch.ones_like(h_x).to(opt.device)) theta2 = F.cosine_similarity(torch.abs(h_y), torch.ones_like(h_y).to(opt.device)) loss3 = torch.sum(1 / (1 + torch.exp(theta1))) + torch.sum( 1 / (1 + torch.exp(theta2))) loss_class = criterion_bce(x_class, labels) + criterion_bce( y_class, labels) theta_code_x = h_x.mm(CODE_MAP.t()) # size: (batch, num_label) theta_code_y = h_y.mm(CODE_MAP.t()) loss_code_map = torch.sum(torch.pow(theta_code_x - opt.bit * (labels * 2 - 1), 2)) + \ torch.sum(torch.pow(theta_code_y - opt.bit * (labels * 2 - 1), 2)) loss_quant = torch.sum(torch.pow( h_x - torch.sign(h_x), 2)) + torch.sum( torch.pow(h_y - torch.sign(h_y), 2)) # err = loss1 + loss2 + loss3 + 0.5 * loss_class + 0.5 * (loss_f1 + loss_f2) err = loss1 + loss2 + opt.alpha * loss3 + opt.beta * loss_class + opt.gamma * loss_code_map + \ opt.eta * loss_quant + opt.mu * loss_adver optimizer.zero_grad() err.backward() optimizer.step() loss.append(err.item()) CODE_MAP = update_code_map(U, V, CODE_MAP, train_labels) FEATURE_MAP = update_feature_map(FEATURE_I, FEATURE_T, train_labels) print('...epoch: %3d, loss: %3.3f' % (epoch + 1, loss[-1])) delta_t = time.time() - t1 if opt.vis_env: vis.plot('loss', loss[-1]) # validate if opt.valid and (epoch + 1) % opt.valid_freq == 0: mapi2t, mapt2i = valid(model, x_query_dataloader, x_db_dataloader, y_query_dataloader, y_db_dataloader, query_labels, db_labels, FEATURE_MAP) print( '...epoch: %3d, valid MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (epoch + 1, mapi2t, mapt2i)) mapi2t_list.append(mapi2t) mapt2i_list.append(mapt2i) train_times.append(delta_t) if opt.vis_env: d = {'mapi2t': mapi2t, 'mapt2i': mapt2i} vis.plot_many(d) if mapt2i >= max_mapt2i and mapi2t >= max_mapi2t: max_mapi2t = mapi2t max_mapt2i = mapt2i save_model(model) path = 'checkpoints/' + opt.dataset + '_' + str(opt.bit) with torch.cuda.device(opt.device): torch.save(FEATURE_MAP, os.path.join(path, 'feature_map.pth')) if epoch % 100 == 0: for params in optimizer.param_groups: params['lr'] = max(params['lr'] * 0.6, 1e-6) if not opt.valid: save_model(model) print('...training procedure finish') if opt.valid: print(' max MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (max_mapi2t, max_mapt2i)) else: mapi2t, mapt2i = valid(model, x_query_dataloader, x_db_dataloader, y_query_dataloader, y_db_dataloader, query_labels, db_labels, FEATURE_MAP) print(' max MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (mapi2t, mapt2i)) path = 'checkpoints/' + opt.dataset + '_' + str(opt.bit) with open(os.path.join(path, 'result.pkl'), 'wb') as f: pickle.dump([train_times, mapi2t_list, mapt2i_list], f)
def train(**kwargs): """ training :param kwargs: :return: """ opt.parse(kwargs) vis = Visualizer(opt.env) # step 1: model model = getattr(models, opt.model)() # ipdb.set_trace() # if opt.load_model_path: # model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # step 2: data train_data = DogCat(opt.train_data_root, train=True) val_data = DogCat(opt.train_data_root, train=False) train_data_loader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_data_loader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # step 3: loss function and optimizer criterion = nn.CrossEntropyLoss() learning_rate = opt.learning_rate optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=opt.weight_decay) # step 4: statistical indicators loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e100 for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in enumerate(train_data_loader): print("epoch: {epoch}, batch: {batch}".format(epoch=epoch, batch=ii)) # training model parameters input_ = Variable(data) target = Variable(label) if opt.use_gpu: input_ = input_.cuda() target = target.cuda() optimizer.zero_grad() score = model(input_) loss = criterion(score, target) loss.backward() # update statistical indicators and visualization loss_meter.add(loss.item()) confusion_matrix.add(score.data, target.data) if ii % opt.print_freq == opt.print_freq - 1: vis.plot('loss', loss_meter.value()[0]) # if necessary, step into debug mode if os.path.exists(opt.debug_file): ipdb.set_trace() model.save() # calculate statistical indicators in the validation set and visualization val_cm, val_accuracy = val(model, val_data_loader) vis.plot("val_accuracy", val_accuracy) vis.log("epoch: {epoch}, learning_rate: {learning_rate}, loss: {loss}, train_cm: {train_cm}, val_cm: {val_cm}".format( epoch=epoch, learning_rate=learning_rate, loss=loss_meter.value()[0], train_cm=str(confusion_matrix.value()), val_cm=str(val_cm.value()))) if loss_meter.value()[0] > previous_loss: learning_rate = learning_rate * opt.learning_rate_decay for param_group in optimizer.param_groups: param_group['lr'] = learning_rate previous_loss = loss_meter.value()[0]
def train(**kwargs): opt._parse(kwargs) vis = Visualizer(opt.env,port = opt.vis_port) device = t.device('cuda') if opt.use_gpu else t.device('cpu') # 数据加载 train_data = FLogo(opt.data_root,train=True) train_dataloader = DataLoader(train_data,opt.batch_size,shuffle=True,num_workers=opt.num_workers) ''' # 以下内容是可视化dataloader的数据的 一 检查dataset是否合理 二 为了写论文凑图 dataiter = iter(train_dataloader) img1,img2,lable=dataiter.next() img1 = tv.utils.make_grid((img1+1)/2,nrow=6,padding=2).numpy() img2 = tv.utils.make_grid((img2+1)/2,nrow=6,padding=2).numpy() plt.figure() plt.imshow(np.transpose(img1, (1, 2, 0))) plt.figure() plt.imshow(np.transpose(img2, (1, 2, 0))) plt.figure() lables = label.unsqueeze(1) # lables mask = tv.utils.make_grid(lables,nrow=6,padding=2).numpy() plt.imshow(np.transpose(mask, (1, 2, 0))) plt.show() from torchvision.transforms import ToPILImage import numpy as np import matplotlib.pylab as plt train() ''' # 网络 net = Net() net.train() # 加载预训练模型 if opt.load_model_path: net.load_state_dict(t.load(opt.load_model_path,map_location = lambda storage,loc:storage),False) print('已加载完。。') else: # 模型初始化 for m in net.modules(): if isinstance(m, (nn.Conv2d, nn.Linear)): nn.init.xavier_normal_(m.weight) print('模型参数完成初始化。。') net.to(device) # 损失函数和优化器 criterion = nn.BCEWithLogitsLoss(pos_weight=opt.pos_weight.to(device)) optimizer = t.optim.SGD(net.parameters(),lr=opt.lr, momentum=opt.momentum,weight_decay=opt.weight_decay) # 使用meter模块 loss_meter = meter.AverageValueMeter() # 学习率调整策略 # scheduler = StepLR(optimizer, step_size=1000, gamma=0.5) for epoch in range(opt.epoches): loss_meter.reset() # 重置loss_meter?? for ii,(target_img,query_logo,mask) in tqdm.tqdm(enumerate(train_dataloader)): print(target_img.shape) # 训练 target_img = target_img.to(device) query_logo = query_logo.to(device) mask = mask.to(device) optimizer.zero_grad() output = net(query_logo,target_img) output = output.squeeze() predict = t.sigmoid(output) # predict_mask = t.sigmoid(output) # true output should be sigmoid # ipdb.set_trace() true_mask = mask/255 # predict = output.view(output.size(0),-1) # target = true_mask.view(true_mask.size(0),-1) # ipdb.set_trace() # print(predict.size(),target.size()) # loss = criterion(F.softmax(output,dim=2),true_mask) loss = criterion(output,true_mask) # print(loss.item()) loss.backward() optimizer.step() # meter update and visualize loss_meter.add(loss.item()) if (ii+1)%opt.plot_every == 0: vis.img('target_img', ((target_img + 1) / 2).data[0]) vis.img('query_logo', ((query_logo + 1) / 2).data[0]) vis.img('truth groud', (true_mask.data[0])) vis.img('predict', predict.data[0]) pre_judgement = predict.data[0] pre_judgement[pre_judgement > 0.5] = 1 # 改成0.7怎么样! pre_judgement[pre_judgement <= 0.5] = 0 vis.img('pre_judge(>0.5)', pre_judgement) # vis.img('pre_judge', pre_judgement) # vis.log({'predicted':output.data[0].cpu().numpy()}) # vis.log({'truth groud':true_mask.data[0].cpu().numpy()}) print('finish epoch:',epoch) # vis.log({'predicted':output.data[0].cpu().numpy()}) vis.plot('loss',loss_meter.value()[0]) if (epoch+1) %opt.save_model_epoch == 0: vis.save([opt.env]) t.save(net.state_dict(),'checkpoints/%s_localize_v6.pth' % epoch)
def train(**kwargs): opt = Config() for k, v in kwargs.items(): setattr(opt, k, v) device = t.device('cuda') if opt.use_gpu else t.device('cpu') opt.caption_data_path = 'caption.pth' # 原始数据 opt.test_img = '' # 输入图片 # opt.model_ckpt='caption_0914_1947' # 预训练的模型 # 数据 vis = Visualizer(env=opt.env) dataloader = get_dataloader(opt) _data = dataloader.dataset._data word2ix, ix2word = _data['word2ix'], _data['ix2word'] # 模型 model = CaptionModel(opt, word2ix, ix2word) if opt.model_ckpt: model.load(opt.model_ckpt) optimizer = model.get_optimizer(opt.lr) criterion = t.nn.CrossEntropyLoss() model.to(device) # 统计 loss_meter = meter.AverageValueMeter() for epoch in range(opt.epoch): loss_meter.reset() for ii, (imgs, (captions, lengths), indexes) in tqdm.tqdm(enumerate(dataloader)): # 训练 optimizer.zero_grad() imgs = imgs.to(device) captions = captions.to(device) input_captions = captions[:-1] target_captions = pack_padded_sequence(captions, lengths)[0] score, _ = model(imgs, input_captions, lengths) loss = criterion(score, target_captions) loss.backward() optimizer.step() loss_meter.add(loss.item()) # 可视化 if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) # 可视化原始图片 + 可视化人工的描述语句 raw_img = _data['ix2id'][indexes[0]] img_path = opt.img_path + raw_img raw_img = Image.open(img_path).convert('RGB') raw_img = tv.transforms.ToTensor()(raw_img) raw_caption = captions.data[:, 0] raw_caption = ''.join( [_data['ix2word'][ii] for ii in raw_caption]) vis.text(raw_caption, u'raw_caption') vis.img('raw', raw_img, caption=raw_caption) # 可视化网络生成的描述语句 results = model.generate(imgs.data[0]) vis.text('</br>'.join(results), u'caption') model.save()
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) opt.device = t.device('cuda' if t.cuda.is_available() else 'cpu') device = opt.device vis = Visualizer(env=opt.env) # 获取数据 data_all = np.load(opt.pickle_path) data = data_all['data'] word2ix = data_all['word2ix'].item() ix2word = data_all['ix2word'].item() data = t.from_numpy(data) dataloader = DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=1) # 模型定义 model = PoetryModel(len(word2ix), 128, 256) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) loss_func = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict( t.load(opt.model_path, map_location=t.device('cpu'))) model.to(device) loss_avg = 0 for epoch in range(opt.epoch): for ii, data_ in tqdm(enumerate(dataloader)): data_ = data_.long() data_ = data_.to(device) optimizer.zero_grad() input_, target = data_[:, :-1], data_[:, 1:] output, _ = model(input_) loss = loss_func(output, target.reshape(-1)) loss.backward() optimizer.step() loss_avg += loss.item() # 可视化 if (ii + 1) % opt.plot_every == 0: vis.plot('loss', loss_avg / opt.plot_every) loss_avg = 0 poetrys = [[ix2word[_word] for _word in data_[i].tolist()] for i in range(data_.shape[0])][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win='origin_poem') gen_poetries = [] for word in list('春江花月夜凉如水'): gen_poetry = ''.join( generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join( [''.join(poetry) for poetry in gen_poetries]), win='gen_poem') t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
def train(): model = IMAGE_AI_MODEL() model.train() model.cuda() criterion = t.nn.NLLLoss() optimizer = t.optim.Adam(model.parameters(), lr=1e-3) dataloader = get_dataloader() data_set = dataloader.dataset print(len(data_set)) ix2word = dataloader.dataset.ix2word _data = dataloader.dataset._data vis = Visualizer(env='word_embedding_caption') loss_meter = meter.AverageValueMeter() for epoch in range(10): loss_meter.reset() for ii, (img_lows, img_highs, cap_tensor, lengths, indexs) in tqdm.tqdm(enumerate(dataloader)): optimizer.zero_grad() loss = 0 bitch_target_length = 0 for i in range(8): decoder_hidden = img_lows[[i]].unsqueeze(0) cell_hidden = decoder_hidden.clone() encoder_outputs = img_highs[i] target_tensor = cap_tensor[i] target_length = lengths[i] bitch_target_length += target_length decoder_input = t.tensor([0]) decoder_hidden = decoder_hidden.cuda() cell_hidden = cell_hidden.cuda() encoder_outputs = encoder_outputs.cuda() target_tensor = target_tensor.cuda() decoder_input = decoder_input.cuda() raw_img = _data['ix2id'][indexs[i]] img_path_q = 'ai_challenger_caption_train_20170902/caption_train_images_20170902/' img_path = img_path_q + raw_img ture_words = [] for w in range(target_length): ture_words.append(ix2word[target_tensor[w].item()]) ture_words.append('|') decoded_words = [] for di in range(target_length): decoder_output, decoder_hidden, cell_hidden, decoder_attention = model( decoder_input, decoder_hidden, cell_hidden, encoder_outputs) loss += criterion(decoder_output, target_tensor[[di]]) decoder_input = target_tensor[[di]] topv, topi = decoder_output.data.topk(1) if topi.item() == 2: decoded_words.append('<EOS>') break else: decoded_words.append(ix2word[topi.item()]) loss.backward() loss_batch = loss.item() / bitch_target_length loss_meter.add(loss_batch) optimizer.step() plot_every = 10 if (ii + 1) % plot_every == 0: vis.plot('loss', loss_meter.value()[0]) raw_img = Image.open(img_path).convert('RGB') raw_img = tv.transforms.ToTensor()(raw_img) vis.img('raw', raw_img) raw_caption = ''.join(decoded_words) vis.text(raw_caption, win='raw_caption') ture_caption = ''.join(ture_words) vis.text(ture_caption, win='ture_caption') # save prefix = 'IMAGE_AI_MODEL' path = '{prefix}_{time}'.format(prefix=prefix, time=time.strftime('%m%d_%H%M')) t.save(model.state_dict(), path)
def train(**kwargs): config.parse(kwargs) vis = Visualizer(port=2333, env=config.env) # prepare data train_data = Vertebrae_Dataset(config.data_root, config.train_paths, phase='train', balance=config.data_balance) val_data = Vertebrae_Dataset(config.data_root, config.test_paths, phase='val', balance=config.data_balance) # train_data = FrameDiff_Dataset(config.data_root, config.train_paths, phase='train', balance=config.data_balance) # val_data = FrameDiff_Dataset(config.data_root, config.test_paths, phase='val', balance=config.data_balance) print('Training Images:', train_data.__len__(), 'Validation Images:', val_data.__len__()) train_dataloader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers) val_dataloader = DataLoader(val_data, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers) # prepare model # model = ResNet34(num_classes=config.num_classes) # model = DenseNet121(num_classes=config.num_classes) # model = CheXPre_DenseNet121(num_classes=config.num_classes) # model = MultiResDenseNet121(num_classes=config.num_classes) # model = Vgg19(num_classes=config.num_classes) model = MultiResVgg19(num_classes=config.num_classes) if config.load_model_path: model.load(config.load_model_path) if config.use_gpu: model.cuda() if config.parallel: model = torch.nn.DataParallel( model, device_ids=[x for x in range(config.num_of_gpu)]) model.train() # criterion and optimizer criterion = torch.nn.CrossEntropyLoss() lr = config.lr optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=config.weight_decay) # metric softmax = functional.softmax loss_meter = meter.AverageValueMeter() train_cm = meter.ConfusionMeter(config.num_classes) previous_loss = 100 previous_acc = 0 # train if config.parallel: if not os.path.exists( os.path.join('checkpoints', model.module.model_name)): os.mkdir(os.path.join('checkpoints', model.module.model_name)) else: if not os.path.exists(os.path.join('checkpoints', model.model_name)): os.mkdir(os.path.join('checkpoints', model.model_name)) for epoch in range(config.max_epoch): loss_meter.reset() train_cm.reset() # train for i, (image, label, image_path) in tqdm(enumerate(train_dataloader)): # prepare input img = Variable(image) target = Variable(label) if config.use_gpu: img = img.cuda() target = target.cuda() # go through the model score = model(img) # backpropagate optimizer.zero_grad() loss = criterion(score, target) loss.backward() optimizer.step() loss_meter.add(loss.data[0]) train_cm.add(softmax(score, dim=1).data, target.data) if i % config.print_freq == config.print_freq - 1: vis.plot('loss', loss_meter.value()[0]) print('loss', loss_meter.value()[0]) # print result train_accuracy = 100. * sum( [train_cm.value()[c][c] for c in range(config.num_classes)]) / train_cm.value().sum() val_cm, val_accuracy, val_loss = val(model, val_dataloader) if val_accuracy > previous_acc: if config.parallel: if config.save_model_name: model.save( os.path.join('checkpoints', model.module.model_name, config.save_model_name)) else: model.save( os.path.join( 'checkpoints', model.module.model_name, model.module.model_name + '_best_model.pth')) else: if config.save_model_name: model.save( os.path.join('checkpoints', model.model_name, config.save_model_name)) else: model.save( os.path.join('checkpoints', model.model_name, model.model_name + '_best_model.pth')) previous_acc = val_accuracy vis.plot_many({ 'train_accuracy': train_accuracy, 'val_accuracy': val_accuracy }) vis.log( "epoch: [{epoch}/{total_epoch}], lr: {lr}, loss: {loss}".format( epoch=epoch + 1, total_epoch=config.max_epoch, lr=lr, loss=loss_meter.value()[0])) vis.log('train_cm:') vis.log(train_cm.value()) vis.log('val_cm') vis.log(val_cm.value()) print('train_accuracy:', train_accuracy, 'val_accuracy:', val_accuracy) print("epoch: [{epoch}/{total_epoch}], lr: {lr}, loss: {loss}".format( epoch=epoch + 1, total_epoch=config.max_epoch, lr=lr, loss=loss_meter.value()[0])) print('train_cm:') print(train_cm.value()) print('val_cm:') print(val_cm.value()) # update learning rate if loss_meter.value()[0] > previous_loss: lr = lr * config.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) vis = Visualizer(env=opt.env) # 获取数据 data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) dataloader = t.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=2) # 定义model model = PoetryModel(len(word2ix), opt.embedding_dim, opt.hidden_dim) # 优化器 optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) # Loss Function criterion = nn.CrossEntropyLoss() # 使用预训练的模型,为了可持续训练 if opt.model_path and os.path.exists(opt.model_path): model.load_state_dict(t.load(opt.model_path)) # GPU related if opt.use_gpu: model = model.to(device) criterion = criterion.to(device) # loss 计量器 loss_meter = meter.AverageValueMeter() # for loop for epoch in range(opt.epoch): loss_meter.reset() # for : batching dataset for i, data_ in tqdm.tqdm(enumerate(dataloader)): # 训练 # data_ # size: [128, 125] 每次取128行,每行一首诗,长度为125 # type: Tensor # dtype: torch.int32 应该转成long # 这行代码信息量很大: # 第一步:int32 to long # 第二步:将行列互换,为了并行计算的需要 # 第三步:将数据放置在连续内存里,避免后续有些操作报错 data_ = data_.long().transpose(0, 1).contiguous() # GPU related if opt.use_gpu: data_ = data_.to(device) # 到这里 data_.dtype又变成了torch.int64 # print(data_.dtype) # 清空梯度 optimizer.zero_grad() # 错位训练,很容易理解 # 把前n-1行作为input,把后n-1行作为target : model的输入 # 这么做还是为了并行计算的需要 # input_ 加下划线是为了和built_in function input区分开 input_, target = data_[:-1, :], data_[1:, :] # model的返回值 output和hidden # 这里hidden没什么用 output, _ = model(input_) # 计算loss target = target.view(-1) # 新的target.size() [15872] 124 * 128 = 15872 # output.size() [15872, 8293] 8293 是词汇量的大小 loss = criterion(output, target) # 反向传播 loss.backward() # optimizer梯度下降更新参数 optimizer.step() loss_meter.add(loss.data[0]) # 可视化 if (1 + i) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) # 诗歌原文 poetrys = [[ix2word[_word.item()] for _word in data_[:, _iii]] for _iii in range(data_.size(1))][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem') gen_poetries = [] # 分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list(u'春江花月夜凉如水'): gen_poetry = ''.join(generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem') # 迭代一次epoch,保存一下模型 t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) vis = Visualizer(env=opt.env) #获取数据 data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) dataloader = t.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=1) #模型定义 model = PoetryModel(len(word2ix), 128, 256) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) if opt.use_gpu: model.cuda() criterion.cuda() loss_meter = meter.AverageValueMeter() for epoch in range(opt.epoch): loss_meter.reset() for li, data_ in tqdm.tqdm(enumerate(dataloader)): #训练 data_ = data_.long().transpose(1, 0).contiguous() if opt.use_gpu: data_ = data_.cuda() optimizer.zero_grad() ##输入和目标错开 input_, target = Variable(data_[:-1, :]), Variable(data_[1:, :]) output, _ = model(input_) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() loss_meter.add(loss.data[0]) # 可视化 if (1 + ii) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) #诗歌原文 poetrys = [[ix2word[_word] for _word in data_[:, -iii]] for _iii in range(data_.size(1))][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem') gen_poetries = [] #分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list(u'春江花月夜凉如水'): gen_poetry = ''.join( generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join( [''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem') t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
def train(**kwargs): for k,v in kwargs.items(): setattr(opt,k,v) vis = Visualizer(env=opt.env) # 获取数据 data,word2ix,ix2word = get_data(opt) data = t.from_numpy(data)#把数据类型转为tensor dataloader = t.utils.data.DataLoader(data,#初始化Dataloader类实例 batch_size=opt.batch_size, shuffle=True, num_workers=1) # 模型定义 model = PoetryModel(len(word2ix), 128, 256)#(vocab_size, embedding_dim, hidden_dim) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss()#损失函数定义为交叉熵 if opt.model_path: model.load_state_dict(t.load(opt.model_path)) if opt.use_gpu: model.cuda() criterion.cuda() loss_meter = meter.AverageValueMeter() for epoch in range(opt.epoch): loss_meter.reset() for ii,data_ in tqdm.tqdm(enumerate(dataloader)): #tqdm进度条工具 #取一个batch的数据 # 训练 #data_.size:(batch_size,maxlen) data_ = data_.long().transpose(1,0).contiguous()#转置后返回一个内存连续的有相同数据的tensor # if epoch==0 and ii ==0: # print('size of data_ after transpose: \n',data_.size()) if opt.use_gpu: data_ = data_.cuda() optimizer.zero_grad()#梯度清零 input_,target = Variable(data_[:-1,:]),Variable(data_[1:,:])#input_是所有句子的前maxlen-1个item的集合, #target是所有句子的后maxlen-1个item的集合 #以"床前明月光"为例,输入是"床前明月",要预测"前明月光" output,_ = model(input_) #Tensor.view(-1)按照第0个维度逐个元素读取将张量展开成数组 loss = criterion(output,target.view(-1)) loss.backward() optimizer.step() loss_meter.add(loss.data[0]) # 可视化 if (1+ii)%opt.plot_every==0: if os.path.exists(opt.debug_file):#如果存在调试文件, #则进入调试模式 ipdb.set_trace() vis.plot('loss',loss_meter.value()[0]) # 诗歌原文 poetrys=[ [ix2word[_word] for _word in data_[:,_iii]] #每一个句子(诗歌)的每一个item(id)要转换成文本 for _iii in range(data_.size(1))][:16]#_iii的取值范围[,127] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]),win=u'origin_poem') #在visdom中输出这些句子(诗歌)中的前16个 gen_poetries = [] # 分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list(u'春江花月夜凉如水'): gen_poetry = ''.join(generate(model,word,ix2word,word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]),win=u'gen_poem') t.save(model.state_dict(),'%s_%s.pth' %(opt.model_prefix,epoch))
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) vis = Visualizer(env=opt.env) # 获取数据 data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) dataloader = t.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=1) # 模型定义 model = PoetryModel(len(word2ix), 128, 256) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) if opt.use_gpu: model.cuda() criterion.cuda() loss_meter = meter.AverageValueMeter() for epoch in range(opt.epoch): loss_meter.reset() for ii, data_ in tqdm.tqdm(enumerate(dataloader)): # 训练 data_ = data_.long().transpose(1, 0).contiguous() if opt.use_gpu: data_ = data_.cuda() optimizer.zero_grad() input_, target = Variable(data_[:-1, :]), Variable(data_[1:, :]) output, _ = model(input_) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() loss_meter.add(loss.data[0]) # 可视化 if (1 + ii) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) # 诗歌原文 poetrys = [[ix2word[_word] for _word in data_[:, _iii]] for _iii in range(data_.size(1))][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem') gen_poetries = [] # 分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list(u'春江花月夜凉如水'): gen_poetry = ''.join(generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem') t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
def train(**kwargs): config.parse(kwargs) # ============================================ Visualization ============================================= vis = Visualizer(port=2333, env=config.env) vis.log('Use config:') for k, v in config.__class__.__dict__.items(): if not k.startswith('__'): vis.log(f"{k}: {getattr(config, k)}") # ============================================= Prepare Data ============================================= train_data = SlideWindowDataset(config.train_paths, phase='train', useRGB=config.useRGB, usetrans=config.usetrans, balance=config.data_balance) val_data = SlideWindowDataset(config.test_paths, phase='val', useRGB=config.useRGB, usetrans=config.usetrans, balance=False) print('Training Images:', train_data.__len__(), 'Validation Images:', val_data.__len__()) dist = train_data.dist() print('Train Data Distribution:', dist) train_dataloader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers) val_dataloader = DataLoader(val_data, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers) # ============================================= Prepare Model ============================================ # model = AlexNet(num_classes=config.num_classes) # model = Vgg16(num_classes=config.num_classes) # model = Modified_Vgg16(num_classes=config.num_classes) # model = ResNet18(num_classes=config.num_classes) model = ResNet50(num_classes=config.num_classes) # model = DenseNet121(num_classes=config.num_classes) # model = ShallowNet(num_classes=config.num_classes) # model = Customed_ShallowNet(num_classes=config.num_classes) # model = Modified_AGVgg16(num_classes=2) # model = AGResNet18(num_classes=2) print(model) if config.load_model_path: model.load(config.load_model_path) if config.use_gpu: model.cuda() if config.parallel: model = torch.nn.DataParallel( model, device_ids=[x for x in range(config.num_of_gpu)]) # =========================================== Criterion and Optimizer ===================================== # weight = torch.Tensor([1, 1]) # weight = torch.Tensor([dist['1']/(dist['0']+dist['1']), dist['0']/(dist['0']+dist['1'])]) # weight需要将二者反过来,多于二分类可以取倒数 # weight = torch.Tensor([1, 3.5]) # weight = torch.Tensor([1, 5]) weight = torch.Tensor([1, 7]) vis.log(f'loss weight: {weight}') print('loss weight:', weight) weight = weight.cuda() criterion = torch.nn.CrossEntropyLoss(weight=weight) lr = config.lr optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=config.weight_decay) # ================================================== Metrics =============================================== softmax = functional.softmax loss_meter = meter.AverageValueMeter() epoch_loss = meter.AverageValueMeter() train_cm = meter.ConfusionMeter(config.num_classes) # ====================================== Saving and Recording Configuration ================================= previous_auc = 0 if config.parallel: save_model_dir = config.save_model_dir if config.save_model_dir else model.module.model_name save_model_name = config.save_model_name if config.save_model_name else model.module.model_name + '_best_model.pth' else: save_model_dir = config.save_model_dir if config.save_model_dir else model.model_name save_model_name = config.save_model_name if config.save_model_name else model.model_name + '_best_model.pth' save_epoch = 1 # 用于记录验证集上效果最好模型对应的epoch process_record = { 'epoch_loss': [], 'train_avg_se': [], 'train_se_0': [], 'train_se_1': [], 'val_avg_se': [], 'val_se_0': [], 'val_se_1': [], 'AUC': [] } # 用于记录实验过程中的曲线,便于画曲线图 # ================================================== Training =============================================== for epoch in range(config.max_epoch): print( f"epoch: [{epoch+1}/{config.max_epoch}] {config.save_model_name[:-4]} ==================================" ) train_cm.reset() epoch_loss.reset() # ****************************************** train **************************************** model.train() for i, (image, label, image_path) in tqdm(enumerate(train_dataloader)): loss_meter.reset() # ------------------------------------ prepare input ------------------------------------ if config.use_gpu: image = image.cuda() label = label.cuda() # ---------------------------------- go through the model -------------------------------- score = model(image) # ----------------------------------- backpropagate ------------------------------------- optimizer.zero_grad() loss = criterion(score, label) loss.backward() optimizer.step() # ------------------------------------ record loss ------------------------------------ loss_meter.add(loss.item()) epoch_loss.add(loss.item()) train_cm.add(softmax(score, dim=1).detach(), label.detach()) if (i + 1) % config.print_freq == 0: vis.plot('loss', loss_meter.value()[0]) train_se = [ 100. * train_cm.value()[0][0] / (train_cm.value()[0][0] + train_cm.value()[0][1]), 100. * train_cm.value()[1][1] / (train_cm.value()[1][0] + train_cm.value()[1][1]) ] # *************************************** validate *************************************** model.eval() if (epoch + 1) % 1 == 0: Best_T, val_cm, val_spse, val_accuracy, AUC = val( model, val_dataloader) # ------------------------------------ save model ------------------------------------ if AUC > previous_auc and epoch + 1 > 5: if config.parallel: if not os.path.exists( os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0])): os.makedirs( os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0])) model.module.save( os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0], save_model_name)) else: if not os.path.exists( os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0])): os.makedirs( os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0])) model.save( os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0], save_model_name)) previous_auc = AUC save_epoch = epoch + 1 # ---------------------------------- recond and print --------------------------------- process_record['epoch_loss'].append(epoch_loss.value()[0]) process_record['train_avg_se'].append(np.average(train_se)) process_record['train_se_0'].append(train_se[0]) process_record['train_se_1'].append(train_se[1]) process_record['val_avg_se'].append(np.average(val_spse)) process_record['val_se_0'].append(val_spse[0]) process_record['val_se_1'].append(val_spse[1]) process_record['AUC'].append(AUC) vis.plot_many({ 'epoch_loss': epoch_loss.value()[0], 'train_avg_se': np.average(train_se), 'train_se_0': train_se[0], 'train_se_1': train_se[1], 'val_avg_se': np.average(val_spse), 'val_se_0': val_spse[0], 'val_se_1': val_spse[1], 'AUC': AUC }) vis.log( f"epoch: [{epoch+1}/{config.max_epoch}] =========================================" ) vis.log( f"lr: {optimizer.param_groups[0]['lr']}, loss: {round(loss_meter.value()[0], 5)}" ) vis.log( f"train_avg_se: {round(np.average(train_se), 4)}, train_se_0: {round(train_se[0], 4)}, train_se_1: {round(train_se[1], 4)}" ) vis.log( f"val_avg_se: {round(sum(val_spse)/len(val_spse), 4)}, val_se_0: {round(val_spse[0], 4)}, val_se_1: {round(val_spse[1], 4)}" ) vis.log(f"AUC: {AUC}") vis.log(f'train_cm: {train_cm.value()}') vis.log(f'Best Threshold: {Best_T}') vis.log(f'val_cm: {val_cm}') print("lr:", optimizer.param_groups[0]['lr'], "loss:", round(epoch_loss.value()[0], 5)) print('train_avg_se:', round(np.average(train_se), 4), 'train_se_0:', round(train_se[0], 4), 'train_se_1:', round(train_se[1], 4)) print('val_avg_se:', round(np.average(val_spse), 4), 'val_se_0:', round(val_spse[0], 4), 'val_se_1:', round(val_spse[1], 4)) print('AUC:', AUC) print('train_cm:') print(train_cm.value()) print('Best Threshold:', Best_T, 'val_cm:') print(val_cm) # ------------------------------------ save record ------------------------------------ if os.path.exists( os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0])): write_json(file=os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0], 'process_record.json'), content=process_record) # if (epoch+1) % 5 == 0: # lr = lr * config.lr_decay # for param_group in optimizer.param_groups: # param_group['lr'] = lr vis.log(f"Best Epoch: {save_epoch}") print("Best Epoch:", save_epoch)
def train(**kwargs): opt.parse(kwargs) if opt.vis_env: vis = Visualizer(opt.vis_env, port=opt.vis_port) if opt.device is None or opt.device is 'cpu': opt.device = torch.device('cpu') else: opt.device = torch.device(opt.device) images, tags, labels = load_data(opt.data_path, type=opt.dataset) train_data = Dataset(opt, images, tags, labels) train_dataloader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True) L = train_data.get_labels() L = L.to(opt.device) # test i_query_data = Dataset(opt, images, tags, labels, test='image.query') i_db_data = Dataset(opt, images, tags, labels, test='image.db') t_query_data = Dataset(opt, images, tags, labels, test='text.query') t_db_data = Dataset(opt, images, tags, labels, test='text.db') i_query_dataloader = DataLoader(i_query_data, opt.batch_size, shuffle=False) i_db_dataloader = DataLoader(i_db_data, opt.batch_size, shuffle=False) t_query_dataloader = DataLoader(t_query_data, opt.batch_size, shuffle=False) t_db_dataloader = DataLoader(t_db_data, opt.batch_size, shuffle=False) query_labels, db_labels = i_query_data.get_labels() query_labels = query_labels.to(opt.device) db_labels = db_labels.to(opt.device) pretrain_model = load_pretrain_model(opt.pretrain_model_path) generator = GEN(opt.dropout, opt.image_dim, opt.text_dim, opt.hidden_dim, opt.bit, opt.num_label, pretrain_model=pretrain_model).to(opt.device) discriminator = DIS(opt.hidden_dim//4, opt.hidden_dim//8, opt.bit).to(opt.device) optimizer = Adam([ # {'params': generator.cnn_f.parameters()}, ## froze parameters of cnn_f {'params': generator.image_module.parameters()}, {'params': generator.text_module.parameters()}, {'params': generator.hash_module.parameters()} ], lr=opt.lr, weight_decay=0.0005) optimizer_dis = { 'feature': Adam(discriminator.feature_dis.parameters(), lr=opt.lr, betas=(0.5, 0.9), weight_decay=0.0001), 'hash': Adam(discriminator.hash_dis.parameters(), lr=opt.lr, betas=(0.5, 0.9), weight_decay=0.0001) } tri_loss = TripletLoss(opt, reduction='sum') loss = [] max_mapi2t = 0. max_mapt2i = 0. max_average = 0. mapt2i_list = [] mapi2t_list = [] train_times = [] B_i = torch.randn(opt.training_size, opt.bit).sign().to(opt.device) B_t = B_i H_i = torch.zeros(opt.training_size, opt.bit).to(opt.device) H_t = torch.zeros(opt.training_size, opt.bit).to(opt.device) for epoch in range(opt.max_epoch): t1 = time.time() e_loss = 0 for i, (ind, img, txt, label) in tqdm(enumerate(train_dataloader)): imgs = img.to(opt.device) txt = txt.to(opt.device) labels = label.to(opt.device) batch_size = len(ind) h_i, h_t, f_i, f_t = generator(imgs, txt) H_i[ind, :] = h_i.data H_t[ind, :] = h_t.data h_t_detach = generator.generate_txt_code(txt) ##### # train feature discriminator ##### D_real_feature = discriminator.dis_feature(f_i.detach()) D_real_feature = -opt.gamma * torch.log(torch.sigmoid(D_real_feature)).mean() # D_real_feature = -D_real_feature.mean() optimizer_dis['feature'].zero_grad() D_real_feature.backward() # train with fake D_fake_feature = discriminator.dis_feature(f_t.detach()) D_fake_feature = -opt.gamma * torch.log(torch.ones(batch_size).to(opt.device) - torch.sigmoid(D_fake_feature)).mean() # D_fake_feature = D_fake_feature.mean() D_fake_feature.backward() # train with gradient penalty alpha = torch.rand(batch_size, opt.hidden_dim//4).to(opt.device) interpolates = alpha * f_i.detach() + (1 - alpha) * f_t.detach() interpolates.requires_grad_() disc_interpolates = discriminator.dis_feature(interpolates) gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=torch.ones(disc_interpolates.size()).to(opt.device), create_graph=True, retain_graph=True, only_inputs=True)[0] gradients = gradients.view(gradients.size(0), -1) # 10 is gradient penalty hyperparameter feature_gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * 10 feature_gradient_penalty.backward() optimizer_dis['feature'].step() ##### # train hash discriminator ##### D_real_hash = discriminator.dis_hash(h_i.detach()) D_real_hash = -opt.gamma * torch.log(torch.sigmoid(D_real_hash)).mean() optimizer_dis['hash'].zero_grad() D_real_hash.backward() # train with fake D_fake_hash = discriminator.dis_hash(h_t.detach()) D_fake_hash = -opt.gamma * torch.log(torch.ones(batch_size).to(opt.device) - torch.sigmoid(D_fake_hash)).mean() D_fake_hash.backward() # train with gradient penalty alpha = torch.rand(batch_size, opt.bit).to(opt.device) interpolates = alpha * h_i.detach() + (1 - alpha) * h_t.detach() interpolates.requires_grad_() disc_interpolates = discriminator.dis_hash(interpolates) gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=torch.ones(disc_interpolates.size()).to(opt.device), create_graph=True, retain_graph=True, only_inputs=True)[0] gradients = gradients.view(gradients.size(0), -1) hash_gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * 10 hash_gradient_penalty.backward() optimizer_dis['hash'].step() loss_G_txt_feature = -torch.log(torch.sigmoid(discriminator.dis_feature(f_t))).mean() loss_adver_feature = loss_G_txt_feature loss_G_txt_hash = -torch.log(torch.sigmoid(discriminator.dis_hash(h_t_detach))).mean() loss_adver_hash = loss_G_txt_hash tri_i2t = tri_loss(h_i, labels, target=h_t, margin=opt.margin) tri_t2i = tri_loss(h_t, labels, target=h_i, margin=opt.margin) weighted_cos_tri = tri_i2t + tri_t2i i_ql = torch.sum(torch.pow(B_i[ind, :] - h_i, 2)) t_ql = torch.sum(torch.pow(B_t[ind, :] - h_t, 2)) loss_quant = i_ql + t_ql err = opt.alpha * weighted_cos_tri + \ opt.beta * loss_quant + opt.gamma * (loss_adver_feature + loss_adver_hash) optimizer.zero_grad() err.backward() optimizer.step() e_loss = err + e_loss P_i = torch.inverse( L.t() @ L + opt.lamb * torch.eye(opt.num_label, device=opt.device)) @ L.t() @ B_i P_t = torch.inverse( L.t() @ L + opt.lamb * torch.eye(opt.num_label, device=opt.device)) @ L.t() @ B_t B_i = (L @ P_i + opt.mu * H_i).sign() B_t = (L @ P_t + opt.mu * H_t).sign() loss.append(e_loss.item()) print('...epoch: %3d, loss: %3.3f' % (epoch + 1, loss[-1])) delta_t = time.time() - t1 if opt.vis_env: vis.plot('loss', loss[-1]) # validate if opt.valid and (epoch + 1) % opt.valid_freq == 0: mapi2t, mapt2i = valid(generator, i_query_dataloader, i_db_dataloader, t_query_dataloader, t_db_dataloader, query_labels, db_labels) print('...epoch: %3d, valid MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (epoch + 1, mapi2t, mapt2i)) mapi2t_list.append(mapi2t) mapt2i_list.append(mapt2i) train_times.append(delta_t) if 0.5 * (mapi2t + mapt2i) > max_average: max_mapi2t = mapi2t max_mapt2i = mapt2i max_average = 0.5 * (mapi2t + mapt2i) save_model(generator) if opt.vis_env: vis.plot('mapi2t', mapi2t) vis.plot('mapt2i', mapt2i) if epoch % 100 == 0: for params in optimizer.param_groups: params['lr'] = max(params['lr'] * 0.8, 1e-6) if not opt.valid: save_model(generator) print('...training procedure finish') if opt.valid: print(' max MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (max_mapi2t, max_mapt2i)) else: mapi2t, mapt2i = valid(generator, i_query_dataloader, i_db_dataloader, t_query_dataloader, t_db_dataloader, query_labels, db_labels) print(' max MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (mapi2t, mapt2i)) path = 'checkpoints/' + opt.dataset + '_' + str(opt.bit) with open(os.path.join(path, 'result.pkl'), 'wb') as f: pickle.dump([train_times, mapi2t_list, mapt2i_list], f)
def train(**kwargs): # step1:config opt.parse(**kwargs) vis = Visualizer(opt.env) device = t.device('cuda') if opt.use_gpu else t.device('cpu') # step2:data # dataloader, style_img # 这次图片的处理和之前不一样,之前都是normalize,这次改成了lambda表达式乘以255,这种转化之后要给出一个合理的解释 # 图片共分为两种,一种是原图,一种是风格图片,在作者的代码里,原图用于训练,需要很多,风格图片需要一张,用于损失函数 transforms = T.Compose([ T.Resize(opt.image_size), T.CenterCrop(opt.image_size), T.ToTensor(), T.Lambda(lambda x: x*255) ]) # 这次获取图片的方式和第七章一样,仍然是ImageFolder的方式,而不是dataset的方式 dataset = tv.datasets.ImageFolder(opt.data_root,transform=transforms) dataloader = DataLoader(dataset,batch_size=opt.batch_size,shuffle=True,num_workers=opt.num_workers,drop_last=True) style_img = get_style_data(opt.style_path) # 1*c*H*W style_img = style_img.to(device) vis.img('style_image',(style_img.data[0]*0.225+0.45).clamp(min=0,max=1)) # 个人觉得这个没必要,下次可以实验一下 # step3: model:Transformer_net 和 损失网络vgg16 # 整个模型分为两部分,一部分是转化模型TransformerNet,用于转化原始图片,一部分是损失模型Vgg16,用于评价损失函数, # 在这里需要注意一下,Vgg16只是用于评价损失函数的,所以它的参数不参与反向传播,只有Transformer的参数参与反向传播, # 也就意味着,我们只训练TransformerNet,只保存TransformerNet的参数,Vgg16的参数是在网络设计时就已经加载进去的。 # Vgg16是以验证model.eval()的方式在运行,表示其中涉及到pooling等层会发生改变 # 那模型什么时候开始model.eval()呢,之前是是val和test中就会这样设置,那么Vgg16的设置理由是什么? # 这里加载模型的时候,作者使用了简单的map_location的记录方法,更轻巧一些 # 发现作者在写这些的时候越来越趋向方便的方式 # 在cuda的使用上,模型的cuda是直接使用的,而数据的cuda是在正式训练的时候才使用的,注意一下两者的区别 # 在第七章作者是通过两种方式实现网络分离的,一种是对于前面网络netg,进行 fake_img = netg(noises).detach(),使得非叶子节点变成一个类似不需要邱求导的叶子节点 # 第四章还需要重新看, transformer_net = TransformerNet() if opt.model_path: transformer_net.load_state_dict(t.load(opt.model_path,map_location= lambda _s, _: _s)) transformer_net.to(device) # step3: criterion and optimizer optimizer = t.optim.Adam(transformer_net.parameters(),opt.lr) # 此通过vgg16实现的,损失函数包含两个Gram矩阵和均方误差,所以,此外,我们还需要求Gram矩阵和均方误差 vgg16 = Vgg16().eval() # 待验证 vgg16.to(device) # vgg的参数不需要倒数,但仍然需要反向传播 # 回头重新考虑一下detach和requires_grad的区别 for param in vgg16.parameters(): param.requires_grad = False criterion = t.nn.MSELoss(reduce=True, size_average=True) # step4: meter 损失统计 style_meter = meter.AverageValueMeter() content_meter = meter.AverageValueMeter() total_meter = meter.AverageValueMeter() # step5.2:loss 补充 # 求style_image的gram矩阵 # gram_style:list [relu1_2,relu2_2,relu3_3,relu4_3] 每一个是b*c*c大小的tensor with t.no_grad(): features = vgg16(style_img) gram_style = [gram_matrix(feature) for feature in features] # 损失网络 Vgg16 # step5: train for epoch in range(opt.epoches): style_meter.reset() content_meter.reset() # step5.1: train for ii,(data,_) in tqdm(enumerate(dataloader)): optimizer.zero_grad() # 这里作者没有进行 Variable(),与之前不同 # pytorch 0.4.之后tensor和Variable不再严格区分,创建的tensor就是variable # https://mp.weixin.qq.com/s?__biz=MzI0ODcxODk5OA==&mid=2247494701&idx=2&sn=ea8411d66038f172a2f553770adccbec&chksm=e99edfd4dee956c23c47c7bb97a31ee816eb3a0404466c1a57c12948d807c975053e38b18097&scene=21#wechat_redirect data = data.to(device) y = transformer_net(data) # vgg对输入的图片需要进行归一化 data = normalize_batch(data) y = normalize_batch(y) feature_data = vgg16(data) feature_y = vgg16(y) # 疑问??现在的feature是一个什么样子的向量? # step5.2: loss:content loss and style loss # content_loss # 在这里和书上的讲的不一样,书上是relu3_3,代码用的是relu2_2 # https://blog.csdn.net/zhangxb35/article/details/72464152?utm_source=itdadao&utm_medium=referral # 均方误差指的是一个像素点的损失,可以理解N*b*h*w个元素加起来,然后除以N*b*h*w # 随机梯度下降法本身就是对batch内loss求平均后反向传播 content_loss = opt.content_weight*criterion(feature_y.relu2_2,feature_data.relu2_2) # style loss # style loss:relu1_2,relu2_2,relu3_3,relu3_4 # 此时需要求每一张图片的gram矩阵 style_loss = 0 # tensor也可以 for i in tensor:,此时只拆解外面一层的tensor # ft_y:b*c*h*w, gm_s:1*c*h*w for ft_y, gm_s in zip(feature_y, gram_style): gram_y = gram_matrix(ft_y) style_loss += criterion(gram_y, gm_s.expand_as(gram_y)) style_loss *= opt.style_weight total_loss = content_loss + style_loss optimizer.zero_grad() total_loss.backward() optimizer.step() #import ipdb #ipdb.set_trace() # 获取tensor的值 tensor.item() tensor.tolist() content_meter.add(content_loss.item()) style_meter.add(style_loss.item()) total_meter.add(total_loss.item()) # step5.3: visualize if (ii+1)%opt.print_freq == 0 and opt.vis: # 为什么总是以这种形式进行debug if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() vis.plot('content_loss',content_meter.value()[0]) vis.plot('style_loss',style_meter.value()[0]) vis.plot('total_loss',total_meter.value()[0]) # 因为现在data和y都已经经过了normalize,变成了-2~2,所以需要把它变回去0-1 vis.img('input',(data.data*0.225+0.45)[0].clamp(min=0,max=1)) vis.img('output',(y.data*0.225+0.45)[0].clamp(min=0,max=1)) # step 5.4 save and validate and visualize if (epoch+1) % opt.save_every == 0: t.save(transformer_net.state_dict(), 'checkpoints/%s_style.pth' % epoch) # 保存图片的几种方法,第七章的是 # tv.utils.save_image(fix_fake_imgs,'%s/%s.png' % (opt.img_save_path, epoch),normalize=True, range=(-1,1)) # vis.save竟然没找到 我的神 vis.save([opt.env])