def main(crnn, train_loader, val_loader, criterion, optimizer): crnn = crnn.to(device) certerion = criterion.to(device) Iteration = 0 params.best_accuracy = 0.0 while Iteration < params.niter: train(crnn, train_loader, criterion, Iteration) ## max_i: cut down the consuming time of testing, if you'd like to validate on the whole testset, please set it to len(val_loader) accuracy = val(crnn, val_loader, criterion, Iteration, max_i=1000) for p in crnn.parameters(): p.requires_grad = True if Iteration % 50 == 1: print("saving checkpoint...") torch.save( crnn.state_dict(), '{0}/crnn_Rec_done_{1}_{2}.pth'.format(params.experiment, Iteration, accuracy)) print("done") if accuracy > params.best_accuracy: params.best_accuracy = accuracy print('saving best acc....') torch.save( crnn.state_dict(), '{0}/crnn_Rec_done_{1}_{2}.pth'.format(params.experiment, Iteration, accuracy)) torch.save(crnn.state_dict(), '{0}/crnn_best.pth'.format(params.experiment)) print('done') # print("is best accuracy: {0}".format(accuracy > params.best_accuracy)) Iteration += 1
def training(crnn, train_loader, criterion, optimizer): for total_steps in range(params.niter): train_iter = iter(train_loader) i = 0 print("total number", len(train_loader)) while i < len(train_loader): # for p in crnn.parameters(): # p.requires_grad = True #训练阶段 crnn.train() cost = trainBatch(crnn, criterion, optimizer, train_iter) loss_avg.add(cost) i += 1 if i % params.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (total_steps, params.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % params.valInterval == 0: val(crnn, test_dataset, criterion) # 每两个epoch就保存一次模型 if (total_steps + 1) % params.saveInterval == 0: torch.save( crnn.state_dict(), '{0}/crnn_Rec_done_{1}_{2}.pth'.format(params.experiment, total_steps, i))
def training(): for epoch in range(params.nEpochs): train_iter = iter(train_loader) i = 0 while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer, train_iter) loss_avg.add(cost) i += 1 if i % params.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (epoch, params.nEpochs, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % params.valInterval == 0: val(crnn, test_dataset, criterion) if (epoch + 1) % params.saveEpoch == 0: torch.save( crnn.state_dict(), '{0}/crnn_Rec_done_{1}_{2}.pth'.format(params.experiment, epoch, i)) print('Saved model params in dir {}'.format(params.experiment)) val(crnn, test_dataset, criterion)
def training(): for total_steps in range(params.niter): train_iter = iter(train_loader) i = 0 logger.info('length of train_data: %d' % (len(train_loader))) while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() val(crnn, test_dataset1, test_dataset2, test_dataset3, total_steps, i, criterion) return cost = trainBatch(crnn, criterion, optimizer, train_iter) loss_avg.add(cost) i += 1 if i % params.displayInterval == 0: logger.info('[%d/%d][%d/%d] Loss: %f' % (total_steps, params.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() val(crnn, test_dataset, total_steps, i, criterion) if (total_steps + 1) % params.saveInterval == 0: string = "model save to {0}crnn_Rec_done_epoch_{1}.pth".format( log_dir, total_steps) logger.info(string) torch.save( crnn.state_dict(), '{0}crnn_Rec_done_epoch_{1}.pth'.format(log_dir, total_steps))
def training(): for total_steps in range(params.niter): #训练多少个批次 train_iter = iter(train_loader) #用来生成迭代器,返回一个迭代器对象 i = 0 print(len(train_loader)) while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer, train_iter) #criterion 是ctcloss loss_avg.add(cost) #计算平均误差 i += 1 if i % params.displayInterval == 0: #多少次循环打印一次 print('[%d/%d][%d/%d] Loss: %f' % (total_steps, params.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % params.valInterval == 0: #开始验证 val(crnn, test_dataset, criterion) if (total_steps + 1) % params.saveInterval == 0: #保存模型 torch.save( crnn.state_dict(), '{0}/crnn_Rec_done_{1}_{2}.pth'.format(params.experiment, total_steps, i))
def training(start): best = 0.982470 for total_steps in range(start, params.niter): train_iter = iter(train_loader) i = 0 print(len(train_loader)) while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer, train_iter) loss_avg.add(cost) i += 1 if i % params.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (total_steps, params.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % params.valInterval == 0: acc = val(crnn, test_dataset, criterion) if acc > best: best = acc print('save model ..........') # ti = time.strftime('%Y-%m-%d', time.localtime(time.time())) torch.save( crnn.state_dict(), '{0}/best_model_{1}_{2}.pth'.format( params.experiment, total_steps, i))
def training(): accuracy_rate = 0 for total_steps in range(params.niter): train_iter = iter(train_loader) i = 0 print(len(train_loader)) while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer, train_iter) loss_avg.add(cost) i += 1 if i % params.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (total_steps, params.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % params.valInterval == 0: accuracy = val(crnn, test_dataset, criterion) if accuracy > accuracy_rate: torch.save( crnn.state_dict(), '{0}/rnn_no_IO_{1}_{2}_{3}.pth'.format( params.experiment, total_steps, i, accuracy)) accuracy_rate = accuracy
def training(): for total_steps in range(test_params.niter): train_iter = iter(train_loader) i = 0 logger.info('length of train_data: %d' % (len(train_loader))) eval_time = 0.0 prog_bar = mmcv.ProgressBar(test_params.displayInterval) while i < len(train_loader): torch.cuda.empty_cache() for p in crnn.parameters(): p.requires_grad = True crnn.train() val(crnn, test_dataset, criterion, total_steps, i) return start = time.time() cost = trainBatch(crnn, criterion, optimizer, train_iter) eval_time += time.time() - start loss_avg.add(cost) i += 1 prog_bar.update() if i % test_params.tbInterval == 0: print("\n>>>> Tensorboard Log") writer.add_scalar( 'train/loss', loss_avg.val(), int(i + total_steps * len(train_loader))) # record to tb if i % test_params.displayInterval == 0: sys.stdout.write("\r%100s\r" % ' ') sys.stdout.flush() logger.info('[%d/%d][%d/%d] Loss: %f, Cost: %.4fs per batch' % (total_steps, test_params.niter, i, len(train_loader), loss_avg.val(), eval_time / i)) if eval_time / i < 0.2: test_params.displayInterval = 1000 elif eval_time / i < 0.5: test_params.displayInterval = 400 elif eval_time / i < 1.0: test_params.displayInterval = 200 prog_bar = mmcv.ProgressBar( test_params.displayInterval) # new interval loss_avg.reset() val(crnn, test_dataset, criterion, total_steps, i) torch.cuda.empty_cache() if (total_steps + 1) % test_params.saveInterval == 0: string = "model save to {0}crnn_Rec_done_epoch_{1}.pth".format( log_dir, total_steps) logger.info(string) torch.save( crnn.state_dict(), '{0}crnn_Rec_done_epoch_{1}.pth'.format(log_dir, total_steps))
def main(crnn, train_loader, val_loader, criterion, optimizer): if args.cuda: crnn.cuda() Iteration = 0 while Iteration < params.niter: train(crnn, train_loader, criterion, Iteration) ## max_i: cut down the consuming time of testing, if you'd like to validate on the whole testset, please set it to len(val_loader) accuracy = val(crnn, val_loader, criterion, Iteration, max_i=1000) for p in crnn.parameters(): p.requires_grad = True crnn.train() if accuracy > params.best_accuracy: torch.save( crnn.state_dict(), '{0}/crnn_Rec_done_{1}_{2}.pth'.format(params.experiment, total_steps, accuracy)) torch.save(crnn.state_dict(), '{0}/crnn_best.pth'.format(params.experiment)) print("is best accuracy: {0}".format(accuracy > params.best_accuracy)) Iteration += 1
def training(): for total_steps in range(params.niter): if total_steps < 50: train_loader = s_train_loader test_dataset = s_test_dataset elif total_steps < 80: train_loader = m_train_loader test_dataset = m_test_dataset else: train_loader = l_train_loader test_dataset = l_test_dataset # train_loader = train_loader # test_dataset = test_dataset # if total_steps < 60: # optimizer = optim.RMSprop(crnn.parameters(), lr=params.lr) # else: # optimizer = optim.Adam(crnn.parameters(), lr=params.lr, # betas=(params.beta1, 0.999)) train_iter = iter(train_loader) i = 0 print(len(train_loader)) while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer, train_iter) loss_avg.add(cost) i += 1 if i % params.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (total_steps, params.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % params.valInterval == 0: val(crnn, test_dataset, criterion) if (total_steps + 1) % params.saveInterval == 0: torch.save( crnn.state_dict(), '{0}/crnn_Rec_done_{1}_{2}.pth'.format(params.experiment, total_steps, i))
def training(crnn, train_loader, criterion, optimizer): for total_steps in range(params.niter): train_iter = iter(train_loader) i = 0 while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = train_batch(crnn, criterion, optimizer, train_iter) loss_avg.add(cost) i += 1 if i % params.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (total_steps, params.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % params.valInterval == 0: val(crnn, criterion) if total_steps % params.saveInterval == 0: save_name = '{0}/crnn_Rec_done_{1}_{2}.pth'.format( params.experiment, total_steps, i) torch.save(crnn.state_dict(), save_name) print('%s saved' % save_name)
length = torch.IntTensor(params.batchSize) crnn = crnn.CRNN(params.imgH, nc, nclass, params.nh) if params.cuda: crnn.cuda() image = image.cuda() criterion = criterion.cuda() crnn.apply(weights_init) if params.crnn != '': print('loading pretrained model from %s' % params.crnn) preWeightDict = torch.load( params.crnn, map_location=lambda storage, loc: storage) # 加入项目训练的权重 modelWeightDict = crnn.state_dict() for k, v in preWeightDict.items(): name = k.replace('module.', '') # remove `module.` if 'rnn.1.embedding' not in name: # 不加载最后一层权重 modelWeightDict[name] = v crnn.load_state_dict(modelWeightDict) image = Variable(image) text = Variable(text) length = Variable(length) # loss averager loss_avg = utils.averager() # setup optimizer
return cost for epoch in range(opt.nepoch): train_iter = iter(train_loader) i = 0 while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer) loss_avg.add(cost) i += 1 if i % opt.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (epoch, opt.nepoch, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % opt.valInterval == 0 and (epoch + 1) % (opt.nepoch // 40) == 0: # if i % opt.valInterval == 0: val(crnn, test_dataset, criterion) # do checkpointing if i % opt.saveInterval == 0 and (epoch + 1) % (opt.nepoch // 40) == 0: torch.save( crnn.state_dict() if opt.ngpu == 1 else crnn.module.state_dict(), '{0}/netCRNN_{1}_{2}.pth'.format(opt.expr_dir, epoch, i) )
crnn.zero_grad() cost.backward() optimizer.step() return cost for epoch in range(opt.nepoch): train_iter = iter(train_loader) i = 0 while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer) loss_avg.add(cost) i += 1 if i % opt.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (epoch, opt.nepoch, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % opt.valInterval == 0: val(crnn, test_dataset, criterion) # do checkpointing if i % opt.saveInterval == 0: torch.save( crnn.state_dict(), '{0}/netCRNN_{1}_{2}.pth'.format(opt.expr_dir, epoch, i))
m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) crnn = crnn.CRNN(opt.imgH, nc, nclass, opt.nh) crnn.apply(weights_init) if opt.crnn != '': print('loading pretrained model from %s' % opt.crnn) pre_trainmodel = torch.load(opt.crnn) pre_trainmodel_rename = collections.OrderedDict() for k, v in pre_trainmodel.items(): name = k[7:] pre_trainmodel_rename[name] = v model_dict = crnn.state_dict() weig1 = 'rnn.1.embedding.weight' bias1 = 'rnn.1.embedding.bias' if len(model_dict[weig1]) == len(pre_trainmodel[weig1]) and len( model_dict[bias1]) == len(pre_trainmodel[bias1]): crnn.load_state_dict(pre_trainmodel) else: for k, v in model_dict.items(): if (k != weig1 or k != bias1): model_dict[k] = pre_trainmodel[k] crnn.load_state_dict(model_dict) print(crnn) image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH) text = torch.IntTensor(opt.batchSize * 5) length = torch.IntTensor(opt.batchSize)
p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer) loss_avg.add(cost) i += 1 if i % params.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (epoch + 1, params.nepoch, i, len(train_loader), loss_avg.val())) loss_avg.reset() print("end of epoch [%d/%d]" % (epoch + 1, params.nepoch)) print("start testing on val set") valloss, valaccuracy = val(crnn, test_dataset, criterion) print("start testing on train set to check for overfitting") #trainloss, trainaccuracy = val(crnn, train_dataset, criterion) line = str(valloss) + "\t" + str( valaccuracy ) + "\n" #"\t"+str(trainloss)+"\t"+str(trainaccuracy)+"\n" print(line) f.write(line) f.flush() if valloss < prevvalloss or prevvalloss == -1: # do checkpointing torch.save( crnn.state_dict(), '{0}/netCRNN_{1}_{2}.pth'.format(params.expr_dir, epoch, valloss)) prevvalloss = valloss
if __name__ == "__main__": for epoch in range(params.nepoch): train_iter = iter(train_loader) i = 0 while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer) loss_avg.add(cost) i += 1 if i % params.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (epoch, params.nepoch, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % params.valInterval == 0: acc = val(crnn, test_dataset, criterion) # do checkpointing if (epoch + 1) % params.saveInterval == 0: torch.save(crnn.state_dict(), '{0}/netCRNN_{1}_{2}_{3}.pth'.format(params.expr_dir, epoch + 1, i, acc)) # python train.py --trainroot train_all_lmdb --valroot val_all_lmdb # nohup python train.py --trainroot train_all_lmdb --valroot val_all_lmdb >train.log 2>&1 & # python train.py --trainroot train_mobiles_lmdb --valroot val_mobiles_lmdb # nohup python train.py --trainroot train_mobiles_lmdb --valroot val_mobiles_lmdb >train.log 2>&1 &
crnn.zero_grad() cost.backward() optimizer.step() return cost for epoch in range(opt.niter): train_iter = iter(train_loader) i = 0 while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer) loss_avg.add(cost) i += 1 if i % opt.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (epoch, opt.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % opt.valInterval == 0: val(crnn, test_dataset, criterion) # do checkpointing if i % opt.saveInterval == 0: torch.save( crnn.state_dict(), '{0}/netCRNN_{1}_{2}.pth'.format(opt.experiment, epoch, i))
def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) crnn = crnn.CRNN(opt.imgH, nc, nclass, opt.nh) crnn.apply(weights_init) if opt.pretrained != '': print('loading pretrained model from %s' % opt.pretrained) # crnn.load_state_dict(torch.load(opt.pretrained)) pre_weight_dict = torch.load(opt.pretrained, map_location=lambda storage, loc: storage) ##加入项目训练的权重 new_state_dict = crnn.state_dict() for k, v in pre_weight_dict.items(): # if "num_batches_tracked" not in k: if "num_batches_tracked" not in k: name = k.replace('module.', '') # remove `module.` new_state_dict[name] = v crnn.load_state_dict(new_state_dict) print(crnn) image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH) text = torch.IntTensor(opt.batchSize * 5) length = torch.IntTensor(opt.batchSize) if opt.cuda: crnn.cuda()
criterion = CTCLoss() # criterion = torch.nn.CTCLoss() # cnn and rnn image = torch.FloatTensor(params.batchSize, 3, params.imgH, params.imgH) text = torch.IntTensor(params.batchSize * 5) length = torch.IntTensor(params.batchSize) # crnn = crnn.CRNN(params.imgH, nc, nclass, params.nh) crnn = crnn.CRNN(6736, hidden_unit=256) crnn_model_path = 'trained_models/netCRNN_4_48000.pth' #导入预训练模型权重 print("loading pretrained model from %s" % crnn_model_path) crnn.load_state_dict(torch.load(crnn_model_path, map_location='cpu')) # 获取预训练的参数 pretrained_dict = crnn.state_dict() # mycrnn = mycrnn.CRNN(params.imgH, nc, nclass, params.nh) mycrnn = mycrnn.CRNN(class_num=nclass, hidden_unit=256) mycrnn_dict = mycrnn.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in mycrnn_dict } # 使用预训练模型来更新参数 mycrnn_dict.update(pretrained_dict) mycrnn.load_state_dict(mycrnn_dict) if args.cuda: crnn.cuda()
testLoss = 10000 num = 0 for epoch in range(1, opt.niter): print("epoch\t", epoch, "opt.niter\t", opt.niter) train_iter = iter(train_loader) #print(len(train_iter)) i = 0 while i < len(train_loader): print("i:",i) for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer) # print(cost) loss_avg.add(cost) print(loss_avg.val()) i += 1 # print(i,op# t.saveInterval,"Loss:",loss_avg.val()) if epoch % opt.displayInterval == 0: print('[%d/%d][%d/%d] '%(epoch, opt.niter, i, len(train_loader))) loss_avg.reset() if epoch % opt.valInterval == 0: val(crnn, test_dataset, criterion) # do checkpointing if epoch % opt.saveInterval == 0: torch.save(crnn.state_dict(), './{}/CRNN_{}_{}.pth'.format(opt.experiment, epoch, i)) #torch.save(crnn.state_dict(), '{0}/model.pth'.format(opt.experiment))
logger.log('starting to train') for epoch in range(opt.niter): train_iter = iter(train_loader) i = 0 while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = train_batch(crnn, criterion, optimizer) loss_avg.add(cost) i += 1 if i % opt.display_interval == 0: logger.log('[%d/%d][%d/%d] Loss: %f' % (epoch, opt.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() if (epoch+1) % opt.valInterval == 0: val(crnn, test_dataset, criterion) # do checkpointing if (epoch+1) % opt.saveInterval == 0: torch.save( crnn.state_dict(), '{0}/netCRNN_{1}.pth'.format(opt.experiment, epoch)) if (epoch + 1) % 5 == 0: adjust_lr(optimizer, epoch+1)
def main(arg): print(arg) train_dataset = dataset.lmdbDataset( path=arg.train_root, # transform=dataset.resizeNormalize((imgW,imgH)), ) test_dataset = dataset.lmdbDataset( path=arg.test_root, # transform=dataset.resizeNormalize((arg.imgW,arg.imgH)), ) d = test_dataset.__getitem__(0) l = test_dataset.__len__() train_loader = DataLoader(train_dataset, num_workers=arg.num_workers, batch_size=arg.batch_size, collate_fn=dataset.alignCollate( imgH=arg.imgH, imgW=arg.imgW, keep_ratio=arg.keep_ratio), shuffle=True, drop_last=True) criterion = CTCLoss() converter = utils.Converter(arg.num_class) crnn = CRNN(imgH=arg.imgH, nc=3, nclass=arg.num_class + 1, nh=256) # custom weights initialization called on crnn def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) crnn.apply(weights_init) print(crnn) image = torch.FloatTensor(arg.batch_size, 3, arg.imgH, arg.imgW) text = torch.IntTensor(arg.batch_size * 5) length = torch.IntTensor(arg.batch_size) image = Variable(image) text = Variable(text) length = Variable(length) # loss averager loss_avg = utils.averager() # setup optimizer if arg.opt == 'adam': optimizer = optim.Adam(crnn.parameters(), 0.01, betas=(0.5, 0.999)) elif arg.opt == 'adadelta': optimizer = optim.Adadelta(crnn.parameters()) else: optimizer = optim.RMSprop(crnn.parameters(), 0.01) for epoch in range(arg.n_epoch): train_iter = iter(train_loader) i = 0 while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() data = train_iter.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) text_labels, l = converter.encode(cpu_texts) utils.loadData(text, text_labels) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() loss_avg.add(cost) i += 1 if i % arg.displayInterval == 0: print( '[%d/%d][%d/%d] Loss: %f' % (epoch, arg.n_epoch, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % arg.testInterval == 0: test(arg, crnn, test_dataset, criterion, image, text, length) # do checkpointing if i % arg.saveInterval == 0: name = '{0}/netCRNN_{1}_{2}_{3}_{4}.pth'.format( arg.model_dir, arg.num_class, arg.type, epoch, i) torch.save(crnn.state_dict(), name) print('model saved at ', name) torch.save( crnn.state_dict(), '{0}/netCRNN_{1}_{2}.pth'.format(arg.model_dir, arg.num_class, arg.type))
# custom weights initialization called on crnn def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) crnn = crnn.CRNN(opt.imgH, nc, nclass, opt.nh) crnn.apply(weights_init) if opt.crnn != '': print('loading pretrained model from %s' % opt.crnn) pre_trainmodel = torch.load(opt.crnn) model_dict = crnn.state_dict() weig1 = 'module.rnn.1.embedding.weight' bias1 = 'module.rnn.1.embedding.bias' mymodel = {} if len(model_dict[weig1[7:]]) == len(pre_trainmodel[weig1]) and len( model_dict[bias1[7:]]) == len(pre_trainmodel[bias1]): for k, v in pre_trainmodel.items(): mymodel[k[7:]] = v # print(k, len(v)) crnn.load_state_dict(mymodel) else: for k, v in model_dict.items(): if (k != weig1 or k != bias1): model_dict[k] = pre_trainmodel[k] crnn.load_state_dict(model_dict) print(crnn)
def training(): for total_steps in range(params.niter): train_iter = iter(train_loader) i = 0 logger.info('length of train_data: %d' % (len(train_loader))) eval_time = 0.0 prog_bar = mmcv.ProgressBar(params.displayInterval) while i < len(train_loader): i += 1 runtime_error = False crnn.train() loss_avg.reset() start = time.time() cost = trainBatch(crnn, criterion, optimizer, train_iter) eval_time += time.time() - start loss_avg.add(cost.cpu()) prog_bar.update() ''' try: i += 1 #crnn.cuda() crnn.train() loss_avg.reset() start = time.time() cost = trainBatch(crnn, criterion, optimizer, train_iter) eval_time += time.time()-start loss_avg.add(cost.cpu()) prog_bar.update() runtime_error = False except RuntimeError as e: logger.error(e) runtime_error = True except ConnectionRefusedError as e: logger.error(e) runtime_error = True finally: if runtime_error: logger.error("Warning: Some error happen") gc.collect() torch.cuda.empty_cache() ''' if i % params.tbInterval == 0 and not runtime_error: print("\n>>>> Tensorboard Log") writer.add_scalar('train/loss', loss_avg.val(), int(i + total_steps * len(train_loader))) # record to tb if i % params.displayInterval == 0 and not runtime_error: sys.stdout.write("\r%100s\r" % ' ') sys.stdout.flush() logger.info('[%d/%d][%d/%d] Loss: %f, Cost: %.4fs per batch' % (total_steps, params.niter, i, len(train_loader), loss_avg.val(), eval_time / i)) loss_avg.reset() if eval_time / i < 0.2: params.displayInterval = 1000 elif eval_time / i < 0.5: params.displayInterval = 400 elif eval_time / i < 1.0: params.displayInterval = 200 prog_bar = mmcv.ProgressBar( params.displayInterval) # new interval # if i % params.valInterval == 0: # val(crnn, test_dataset, criterion, total_steps, i) # torch.cuda.empty_cache() torch.cuda.empty_cache() val(crnn, test_dataset, criterion, total_steps, i) if (total_steps + 1) % params.saveInterval == 0: string = "model save to {0}crnn_Rec_done_epoch_{1}.pth".format( log_dir, total_steps) logger.info(string) torch.save( crnn.state_dict(), '{0}crnn_Rec_done_epoch_{1}.pth'.format(log_dir, total_steps))
loss_avg.add(cost) i += 1 # if i % opt.displayInterval == 0: # print('[%d/%d][%d/%d] Loss: %f' % # (epoch, opt.niter, i, len(train_loader), loss_avg.val())) # loss_avg.reset() if i % opt.valInterval == 0: testLoss, accuracy = val(crnn, test_dataset, criterion) # print('Test loss: %f, accuray: %f' % (testLoss, accuracy)) print("epoch:{},step:{},Test loss:{},accuracy:{},train loss:{}". format(epoch, num, testLoss, accuracy, loss_avg.val())) loss_avg.reset() print('Save model to:', opt.experiment) torch.save(crnn.state_dict(), '{}/netCRNN.pth'.format( opt.experiment)) # do checkpointing num += 1 # lasttestLoss = min(lasttestLoss,testLoss) if lasttestLoss > testLoss: print("The step {},last lost:{}, current: {},save model!".format( num, lasttestLoss, testLoss)) lasttestLoss = testLoss # delete(opt.experiment)##删除历史模型 torch.save(crnn.state_dict(), '{}/netCRNN.pth'.format( opt.experiment)) numLoss = 0 else: numLoss += 1
cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() return cost for epoch in range(opt.niter): train_iter = iter(train_loader) i = 0 while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer) loss_avg.add(cost) i += 1 if i % opt.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' %(epoch, opt.niter, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % opt.valInterval == 0: val(crnn, test_dataset, criterion) # do checkpointing if i % opt.saveInterval == 0: print("====================%s======================" % epoch) torch.save(crnn.state_dict(), '{0}/netCRNN_{1}_{2}.pth'.format(opt.experiment, epoch, i))
return cost for epoch in range(opt.epoch): train_iter = iter(train_loader) i = 0 while i < len(train_loader): scheduler.step() if optimizer.param_groups[0]['lr'] < 1e-4: optimizer.param_groups[0]['lr'] = 1e-2 time0 = time.time() cost = trainBatch() loss_avg.add(cost) i += 1 if i % opt.displayInterval == 0: print('[%d/%d][%d/%d] lr: %.4f Loss: %f Time: %f s' % (epoch, opt.epoch, i, len(train_loader), optimizer.param_groups[0]['lr'], loss_avg.val(), time.time() - time0)) loss_avg.reset() if i % opt.valInterval == 0: val(crnn, test_loader, criterion) # do checkpointing if i % opt.saveInterval == 0: torch.save(crnn.state_dict(), '{0}/crnn1.pth'.format(opt.experiment))
if opt.model=='ctc': val_CER, val_WER, val_ACC = val(crnn, test_loader, criterion, test_aug = opt.test_aug, n_aug = opt.n_aug if opt.test_aug else 1) train_CER, train_WER, train_ACC = val(crnn, train_loader, criterion) history_errors.append([epoch, i, loss,train_ACC,train_WER,train_CER,val_ACC,val_WER,val_CER]) if opt.plot: utils.savePlot(history_errors,model_rpath) # do checkpointing if (epoch % opt.saveEpoch == 0) and (i >= len(train_loader)): # Runs at end of some epochs print("Saving epoch", '{0}/netCRNN_{1}_{2}.pth'.format(model_rpath, epoch, i)) if opt.model=='ctc': torch.save(crnn.state_dict(), '{0}/netCRNN_{1}_{2}.pth'.format(model_rpath, epoch, i)) elif opt.mode=='test': if opt.dataset=='ICFHR': if opt.model=='ctc': files, predictions = test(crnn, test_loader, criterion, n_aug=opt.n_aug if opt.test_aug else 1) with io.open(opt.test_file, "w", encoding=encoding) as test_results: for f, pred in zip(files, predictions): test_results.write(' '.join([unicode(f, encoding=encoding), pred]) + u"\n") # this should combine ascii text and unicode correctly elif opt.dataset=='READ': if opt.model=='ctc': files, predictions = test(crnn, test_loader, criterion, n_aug=opt.n_aug if opt.test_aug else 1) with io.open(opt.test_file, "w", encoding=encoding) as test_results: for f, pred in zip(files, predictions):
cost, cer_loss, batch_size = trainBatch(crnn, criterion, optimizer) train_cer += cer_loss train_ctc += cost * batch_size loss_avg.add(cost) i += 1 if i % opt.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (epoch, opt.nepoch, i, len(train_loader), loss_avg.val())) loss_avg.reset() if opt.valid_result and i % opt.valInterval == 0: val(crnn, criterion) torch.save(crnn.state_dict(), '{0}/netCRNN_{1}.pth'.format(opt.expr_dir, epoch)) test_ctc, test_cer = test(valid_loader) print('CTC Train Loss:', train_ctc * 1.0 / len(train_idx)) print('CTC Test Loss:', test_ctc) print('CER Train Loss:', train_cer * 1.0 / len(train_idx)) print('CER Test Loss:', test_cer) writer.add_scalars("loss.ctc", { 'train': train_ctc * 1.0 / len(train_idx), 'test': test_ctc }, epoch) writer.add_scalars("loss.cer", {
train_iter = iter(train_loader) # print(len(train_iter)) i = 0 while i < len(train_loader): # print("i",i) for p in crnn.parameters(): p.requires_grad = True crnn.train() cost = trainBatch(crnn, criterion, optimizer) # print(cost) loss_avg.add(cost) print(loss_avg.val()) i += 1 # print(i,op# t.saveInterval,"Loss:",loss_avg.val()) if i % opt.displayInterval == 0: print('[%d/%d][%d/%d] ' % (epoch, opt.niter, i, len(train_loader))) loss_avg.reset() if i % opt.valInterval == 0: val(crnn, test_dataset, criterion) # do checkpointing if i % opt.saveInterval == 0: # torch.save( # crnn.state_dict(), '{0}/netCRNN_{1}_{2}.pth'.format(opt.experiment, epoch, i)) torch.save( crnn.state_dict(), '{0}/model.pth'.format(opt.experiment))