def trainBatch(net, criterion, optimizer): data = train_iter.next() cpu_images, cpu_texts = data #print (type(cpu_texts), cpu_texts) batch_size = cpu_images.size(0) util.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) util.loadData(text, t) util.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() # optimizer.zero_grad() cost.backward() # optimizer.step() # torch.nn.utils.clip_grad_norm(crnn.parameters(), 5) # for p in crnn.parameters(): # p.data.add(-opt.lr, p.grad.data) # for w in crnn.parameters(): #w.grad.data.clamp_(-5,5) optimizer.step() return cost
def trainBatch(net, optimizer): data = train_iter.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) # length (batch) H, cost = ctc_ent_cost(preds, text, preds_size, length) cost_sum = cost.data.sum() inf = float("inf") if cost_sum == inf or cost_sum == -inf or cost_sum <= -1e5 or torch.isnan( cost) or torch.isnan(H): print("Warning: received an inf loss, setting loss value to 0") return torch.zeros(H.size()), torch.zeros(cost.size()) crnn.zero_grad() (-opt.h_rate * H + (1 - opt.h_rate) * cost).backward() torch.nn.utils.clip_grad_norm(crnn.parameters(), opt.max_norm) optimizer.step() return H / batch_size, cost / batch_size
def trainBatch(crnn, criterion, optimizer): # 取一个Batch的数据集 data = train_iter.next() # 区分图片 和 标签 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) # 图片数据加载到张量 utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) # 标签数据加载到张量 utils.loadData(text, t) # 长度数据加载到张量 utils.loadData(length, l) # 执行forward preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) # cost = criterion(preds, text, preds_size, length) cost = criterion(preds, text, preds_size, length) / batch_size # print("sss:{}".format(isinstance(crit, Variable))) # cost = crit / batch_size crnn.zero_grad() cost.backward() optimizer.step() return cost
def trainBatch(net, criterion, optimizer, train_iter): data = train_iter.next() cpu_images, cpu_texts = data #bytes(cpu_texts, encoding = "utf8") #print(cpu_texts) #text1 = re.match('(?!b)',cpu_texts).group() #cpu_texts = re.match('(?!\')',text1).group() #print(cpu_texts) #print("cpu_texts") #print(type(cpu_texts[0])) batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() return cost
def train(crnn, train_loader, criterion, iteration): for p in crnn.parameters(): p.requires_grad = True crnn.train() for i_batch, (image, index) in enumerate(train_loader): if args.cuda: image = image.cuda() criterion = criterion.cuda() label = utils.get_batch_label(dataset, index) preds = crnn(image) batch_size = image.size(0) index = np.array(index.data.numpy()) text, length = converter.encode(label) preds_size = torch.IntTensor([preds.size(0)] * batch_size) # print(preds.shape, text.shape, preds_size.shape, length.shape) # torch.Size([41, 16, 6736]) torch.Size([160]) torch.Size([16]) torch.Size([16]) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() loss_avg.add(cost) if i_batch == 100: break if (i_batch + 1) % params.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (iteration, params.niter, i_batch, len(train_loader), loss_avg.val())) loss_avg.reset()
def trainBatch(net, criterion, optimizer): data = train_iter.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) decode_texts = [text[2:-1] for text in cpu_texts] t, l = converter.encode(decode_texts) utils.loadData(text, t) utils.loadData(length, l) # it will merge on the dimension 0 when running in multiple GPUs mode, # say we use 4 GPUs, # image_size = [batch_size, channels, height, width] = [16, 1, 48, 600] # preds(in CRNN) = [Seq_len, batch_size, nOut] = [151, 4, 37] # preds(in trainBatch) = [Seq_len * num_gpu, batch_size, nOut] = [604, 4, 37] # that is, since we specify the DataParallel on the dimension 0, it will merge each batch to the dimension 0, which will result in an error in the following steps preds = crnn(image) preds_chunks = preds.chunk(len(gpu_ids), dim=0) preds = torch.cat( preds_chunks, dim=1 ) # [num_gpu * time_step, batch_size / num_gpu, nOut] -> [time_step, batch_size, nOut] preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) # output = [batch_size, time_step, nOut=nclass] if set batch_first true # preds_size = Variable(torch.IntTensor([preds.size(1)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() return cost
def trainBatch(net, optimizer): # print('train_iter: ', train_iter) # print('len_train_iter: ', len(train_iter)) # print(type(train_iter.next())) data = train_iter.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) H, cost = seg_ctc_ent_cost(preds, text, preds_size, length, uni_rate=opt.uni_rate) h_cost = (1 - opt.h_rate) * cost - opt.h_rate * H cost_sum = h_cost.data.sum() inf = float("inf") if cost_sum == inf or cost_sum == -inf or cost_sum > 200 * batch_size: print("Warning: received an inf loss, setting loss value to 0") return torch.zeros(H.size()), torch.zeros(cost.size()), torch.zeros( h_cost.size()) crnn.zero_grad() h_cost.backward() torch.nn.utils.clip_grad_norm(crnn.parameters(), opt.max_norm) optimizer.step() return H / batch_size, cost / batch_size, h_cost / batch_size
def trainBatch(net, criterion, optimizer): data = train_iter.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) # dot=make_dot(preds, params=dict(crnn.named_parameters())) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size # if (np.isnan(cost.data.numpy())): # print(net._modules['module'].cnn.conv0.weight.grad) # # print("cost-------------------------------------------------------",cost) # # return crnn.zero_grad() cost.backward() # print(crnn.state_dict()) optimizer.step() return cost
def trainBatch(net, criterion, optimizer): data = train_iter.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) if CUDA: preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)).cuda() else: preds_size = torch.IntTensor([preds.size(0)] * batch_size) cost = criterion(preds, text.long(), preds_size.long(), length.long()).sum() / float(batch_size) # cost = cost crnn.zero_grad() cost.backward() optimizer.step() return cost
def trainBatch(net, criterion, optimizer): data = train_iter.next() if focal_alpha: cpu_images, cpu_texts, alpha = data alpha = torch.FloatTensor(list(alpha)) utils.loadData(probs, alpha) assert not probs.requires_grad else: cpu_images, cpu_texts = data batch_size = cpu_images.size(0) assert batch_size > 0 utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) #print(cpu_texts, 'converts to', t, t.size()) global text utils.loadData(text, t) utils.loadData(length, l) text = text.view((batch_size, -1)) text = text.cuda() preds = F.log_softmax(crnn(image), dim=-1) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) _, preds_str = preds.max(2) preds_str = preds_str.transpose(1, 0).contiguous().view(-1) preds_str = converter.decode(preds_str.data, preds_size.data, raw=False) acc = (np.array(preds_str) == np.array(cpu_texts)).mean() if display_flag: writer.add_figure('Train predictions vs. actuals', plot_preds(cpu_images, preds_str, cpu_texts), global_step=global_step) writer.add_figure('Gradient', plot.plot_grad_flow_v2(crnn.named_parameters()), global_step=global_step) #print('preds:', preds.size()) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) #print('preds_size:', preds_size, '\tlength:', length) #print('preds.size():', preds.size(), 'text.size()', text.size()) cost = criterion(preds, text, preds_size, length) if opt.focal: cost = cost * probs cost = cost.sum() / batch_size writer.add_scalars('training', { 'loss': cost.item(), 'acc': acc }, global_step) writer.add_scalars('lr', plot.get_lr(optimizer), global_step) crnn.zero_grad() cost.backward() torch.nn.utils.clip_grad_value_(crnn.parameters(), 1) optimizer.step() return cost
def trainBatch(net, criterion, optimizer): data = train_iter.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image, length) cost = criterion(preds, text) crnn.zero_grad() cost.backward() optimizer.step() return cost
def trainBatch(crnn, criterion, optimizer): data = train_iter.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() return cost
def trainBatch(net, criterion, optimizer): data = train_iter.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() return cost
def trainBatch(net, criterion, optimizer): data = train_iter.next() cpu_trajects, cpu_texts = data cpu_trajects = padding2tensor(cpu_trajects) #print(cpu_trajects) #print(cpu_texts) batch_size = cpu_trajects.size(0) utils.loadData(traject, cpu_trajects) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(traject) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() return cost
def trainBatch(net, data, criterion, optimizer): cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() cost = cost.detach().item() _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) cer_loss = utils.cer_loss(sim_preds, cpu_texts) return cost, cer_loss, len(cpu_images)
def trainBatch(net, criterion, optimizer, train_iter): data = train_iter.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) #print("batch_size train",cpu_images) #print("texts size tracin",cpu_texts) #cpu_texts:(b'JHP5U1R- NE', b'1EVNHF3 1Z8S0XC', b'XJH20HW HWUH1LGK00', b'3KDXH9MT T9G-8J', b'F', b'Q-8GXT', b'2302260770 2D 2426K', b'28 KCKH9RBFQQXC', b'N71', b'2308010565 4D TR144-GD', b'LLTBF75XF KH6', b'1 ZZXE', b'DC0J8-Q6XSN D9CQQ850M', b'GX7S -SRPGCTNDZ', b'ZGKND99BGT 0X614A48B81K', b'DD8L0EL') utils.loadData(image, cpu_images) #print("cpu_images size=",cpu_images.size()) #print("image size=",image.size())#image, cpu_images size:torch.Size([16, 1, 32, 180]) #print("image=",cpu_images) #print("cpu_texts",cpu_texts) #cpu_texts length:16 t, l = converter.encode(cpu_texts) #print("t=",t) #print("l=",l) #t= tensor([ 7, 25, 4, 27, 32, 23, 1, 25, 9, 11, 19, 36, 2, 36, 6, 12, 12, 35, # 13, 9, 23, 26, 7, 26, 3, 30, 7, 8, 10, 35, 15, 24, 3, 2, 2, 3, # 3, 3, 1, 9, 1, 3, 35, 2, 12, 35, 18, 28, 10, 1, 3, 6, 23, 32, # 8, 9, 35, 21, 4, 9, 24, 26, 10, 2, 12, 29, 6, 1, 10, 2, 1, 14, # 2, 22, 16, 2, 4, 11, 34, 32, 34, 6, 6, 16, 19, 35, 8, 10, 27, 1, # 24, 13, 20, 16, 34, 36, 18, 2, 18, 2, 36, 21, 14, 35, 14, 2, 33, 26, # 14, 30, 1, 35, 13, 32, 8, 24, 23, 11, 36, 31, 11, 4, 9, 15, 21, 35, # 1, 24, 34, 15, 2, 26, 12, 13, 27, 25, 33, 10, 10, 32, 31, 24, 36, 36, # 18, 35, 33, 32, 24, 16, 20, 27, 23, 20, 26], dtype=torch.int32) #l= tensor([11, 17, 4, 22, 11, 11, 1, 20, 10, 9, 3, 9, 7, 8, 6, 6],dtype=torch.int32) utils.loadData(text, t) #text:each character's map index utils.loadData(length, l) #length:each label length preds = crnn(image) preds = preds.to(torch.float64) preds = preds.to(device) #print("preds size()=",preds.size()) #preds.size:(w,b,c)=>(T, N, C):([46, 16, 37]) preds_size = torch.IntTensor([preds.size(0)] * batch_size) #[46]*16 cost = criterion( preds, text, preds_size, length ) / batch_size #prediction ,target,prediction_length,target_length,cost= tensor(0.6697, device='cuda:0', grad_fn=<DivBackward0>) #print("cost=",cost) crnn.zero_grad() cost.backward() optimizer.step() return cost
def trainBatch(net, criterion, optimizer): data = train_iter.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) print("on train, preds is: " + str(preds)) if log_for_explore else None print("on train, preds after logsoftmax is: " + str(preds.log_softmax(2))) if log_for_explore else None print("on train, preds_size is: " + str(preds_size)) if log_for_explore else None print("on train, target_size is: " + str(length)) if log_for_explore else None cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() return cost
def trainBatch(net, criterion, optimizer, train_iter): data = train_iter.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) myUtils.dbvart(cpu_texts) myUtils.dbvart(t, l) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) myUtils.dbvart(preds.size()) #[41 2 6736] myUtils.dbvart(text.size()) #[20] myUtils.dbvart(preds_size) #[41,41] myUtils.dbvart(length) #[10,10] cost = criterion(preds, text, preds_size, length) / batch_size #print(text.shape) crnn.zero_grad() cost.backward() optimizer.step() return cost
def trainBatch(net, criterion, optimizer, flage=False): data = train_iter.next() cpu_images, cpu_texts = data ##decode utf-8 to unicode if ifUnicode: cpu_texts = [clean_txt(tx.decode('utf-8')) for tx in cpu_texts] batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() if flage: lr = 0.0001 optimizer = optim.Adadelta(crnn.parameters(), lr=lr) optimizer.step() return cost
def train(crnn, train_loader, criterion, optimizer, valid_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() train_iter = iter(train_loader) # loss averager loss_avg = utils.averager() for i in range(len(train_loader)): data = train_iter.next() _, images, texts = data batch_size = images.size(0) t, l = converter.encode(texts) images = images.cuda() preds = crnn(images) preds_size = torch.IntTensor([preds.size(0)] * batch_size) cost = criterion(preds, t, preds_size, l) / batch_size crnn.zero_grad() cost.backward() optimizer.step() loss_avg.add(cost) if (i + 1) % opt.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (epoch, opt.nepoch, i, len(train_loader), loss_avg.val())) loss_avg.reset()
def main(arg): print(arg) train_dataset = dataset.lmdbDataset( path=arg.train_root, # transform=dataset.resizeNormalize((imgW,imgH)), ) test_dataset = dataset.lmdbDataset( path=arg.test_root, # transform=dataset.resizeNormalize((arg.imgW,arg.imgH)), ) d = test_dataset.__getitem__(0) l = test_dataset.__len__() train_loader = DataLoader(train_dataset, num_workers=arg.num_workers, batch_size=arg.batch_size, collate_fn=dataset.alignCollate( imgH=arg.imgH, imgW=arg.imgW, keep_ratio=arg.keep_ratio), shuffle=True, drop_last=True) criterion = CTCLoss() converter = utils.Converter(arg.num_class) crnn = CRNN(imgH=arg.imgH, nc=3, nclass=arg.num_class + 1, nh=256) # custom weights initialization called on crnn def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) crnn.apply(weights_init) print(crnn) image = torch.FloatTensor(arg.batch_size, 3, arg.imgH, arg.imgW) text = torch.IntTensor(arg.batch_size * 5) length = torch.IntTensor(arg.batch_size) image = Variable(image) text = Variable(text) length = Variable(length) # loss averager loss_avg = utils.averager() # setup optimizer if arg.opt == 'adam': optimizer = optim.Adam(crnn.parameters(), 0.01, betas=(0.5, 0.999)) elif arg.opt == 'adadelta': optimizer = optim.Adadelta(crnn.parameters()) else: optimizer = optim.RMSprop(crnn.parameters(), 0.01) for epoch in range(arg.n_epoch): train_iter = iter(train_loader) i = 0 while i < len(train_loader): for p in crnn.parameters(): p.requires_grad = True crnn.train() data = train_iter.next() cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) text_labels, l = converter.encode(cpu_texts) utils.loadData(text, text_labels) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() loss_avg.add(cost) i += 1 if i % arg.displayInterval == 0: print( '[%d/%d][%d/%d] Loss: %f' % (epoch, arg.n_epoch, i, len(train_loader), loss_avg.val())) loss_avg.reset() if i % arg.testInterval == 0: test(arg, crnn, test_dataset, criterion, image, text, length) # do checkpointing if i % arg.saveInterval == 0: name = '{0}/netCRNN_{1}_{2}_{3}_{4}.pth'.format( arg.model_dir, arg.num_class, arg.type, epoch, i) torch.save(crnn.state_dict(), name) print('model saved at ', name) torch.save( crnn.state_dict(), '{0}/netCRNN_{1}_{2}.pth'.format(arg.model_dir, arg.num_class, arg.type))
# optimizer = optim.Adadelta(crnn.parameters()) # optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr) min_val_loss = 0.2 for epoch in range(500): loss_avg = 0 crnn.train() for patch, (im, labels) in enumerate(train_loader): preds = crnn(Variable(im).to(device=device)) preds = preds.log_softmax(2) target, target_length = get_labels(alphabet.dict, labels) preds_size = Variable(torch.LongTensor([preds.size(0)] * batch_size)).to(device=device) loss = criterion(preds, Variable(target).to(device=device), preds_size, Variable(target_length).to(device=device)) / batch_size crnn.zero_grad() loss.backward() optimizer.step() loss_avg += loss.item() if (patch+1)%50 == 0 or patch == len(train_loader)-1: print('[Train][Epoch: {}/200][Patch: {}/{}][Loss: {:.4f}]' .format(epoch+1, patch+1, len(train_loader), loss_avg/(patch+1))) # loss_avg = 0 # crnn.eval() # for patch, (im, labels) in enumerate(val_loader): # preds = crnn(Variable(im).to(device=device)) # preds = preds.log_softmax(2) # target, target_length = get_labels(alphabet.dict, labels) # preds_size = Variable(torch.LongTensor([preds.size(0)] * batch_size)).to(device=device)