def evaluate(image, text, encoder, decoder, data_loader, max_eval_iter=100): for e, d in zip(encoder.parameters(), decoder.parameters()): e.requires_grad = False d.requires_grad = False encoder.eval() decoder.eval() val_iter = iter(data_loader) n_correct = 0 n_total = 0 loss_avg = utils.Averager() for i in range(min(len(data_loader), max_eval_iter)): cpu_images, cpu_texts = val_iter.next() batch_size = cpu_images.size(0) utils.load_data(image, cpu_images) target_variable = converter.encode(cpu_texts) n_total += len(cpu_texts[0]) + 1 decoded_words = [] decoded_label = [] encoder_outputs = encoder(image) if torch.cuda.is_available(): target_variable = target_variable.cuda() decoder_input = target_variable[0].cuda() decoder_hidden = decoder.initHidden(batch_size).cuda() else: decoder_input = target_variable[0] decoder_hidden = decoder.initHidden(batch_size) for di in range(1, target_variable.shape[0]): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) topv, topi = decoder_output.data.topk(1) ni = topi.squeeze(1) decoder_input = ni if ni == utils.EOS_TOKEN: decoded_label.append(utils.EOS_TOKEN) break else: decoded_words.append(converter.decode(ni)) decoded_label.append(ni) for pred, target in zip(decoded_label, target_variable[1:, :]): if pred == target: n_correct += 1 if i % 10 == 0: texts = cpu_texts[0] print('pred {}: {}'.format(i, ''.join(decoded_words))) print('gt {}: {}'.format(i, texts)) accuracy = n_correct / float(n_total) print('Test loss: {}, accuray: {}'.format(loss_avg.val(), accuracy))
def evaluate(image, text, model, criterion, data_loader, max_eval_iter=100): # for e, d in zip(encoder.parameters(), decoder.parameters()): # e.requires_grad = False # d.requires_grad = False # encoder.eval() # decoder.eval() model.eval() val_iter = iter(data_loader) n_correct = 0 n_total = 0 loss_avg = utils.Averager() epoch_loss = 0 with torch.no_grad(): for i in range(min(len(data_loader), max_eval_iter)): cpu_images, cpu_texts = val_iter.next() batch_size = cpu_images.size(0) utils.load_data(image, cpu_images) target_variable = converter.encode(cpu_texts) n_total += len(cpu_texts[0]) + 1 decoded_words = [] # encoder_outputs = encoder(image) if torch.cuda.is_available(): target_variable = target_variable.cuda() decoded_label = model(image, target_variable, 0) label_number, batch, output_dim = decoded_label.size() decoded_label = decoded_label.view(-1, output_dim) target_variable = target_variable.view(-1) loss = criterion(decoded_label, target_variable) epoch_loss += loss.item() if i % 10 == 0: texts = cpu_texts[0] for idl in range(decoded_label.shape[0]): topv, topi = decoded_label[idl].data.topk(1) ni = topi.squeeze() decoded_words.append(converter.decode(ni)) print('pred {}: {}'.format(i, ' '.join(decoded_words))) print('gt {}: {}\n'.format(i, texts)) accuracy = epoch_loss / max_eval_iter print('Test epoch loss: {}, accuray: {}'.format(epoch_loss, accuracy))
def valid(net, valid_loader, device, cfg): print('start valid') criterion = nn.CTCLoss() loss_avg = utils.Averager() net.eval() # net = net.to(device) correct_num = 0 total_num = 0 for i, (images, labels) in enumerate(valid_loader): images = images.to(device) labels, labels_len = matrix2linear(labels) labels = labels.to(device) labels_len = torch.IntTensor(labels_len) preds = net(images) # if torch.sum(torch.isnan(preds)) >= 1: # print('nan: {}, lr: {}'.format(i + 1, scheduler.get_lr()[0])) # break preds_len = torch.IntTensor([preds.size(0)] * int(preds.size(1))) with torch.backends.cudnn.flags(enabled=False): loss = criterion(preds, labels, preds_len, labels_len) loss_avg.add(loss) preds = preds.max(2)[1] # print(preds.size()) preds = preds.transpose(1, 0).contiguous().view(-1) # print(preds.size()) preds = decode(preds) # print(len(preds)) total_num += len(preds) for x, y in zip(preds, labels): if int(x) == int(y): correct_num += 1 acc = correct_num / float(total_num) * 100 valid_loss = loss_avg.val() print('Valid Loss: {0:.3f}, Accuracy: {1:.3f}%'.format(valid_loss, acc))
def train(net, train_loader, valid_loader, device, cfg): criterion = nn.CTCLoss() # optimizer = torch.optim.Adadelta(net.parameters(), lr=cfg.learning_rate) optimizer = torch.optim.Adam( net.parameters(), lr=cfg.learning_rate, weight_decay=cfg.weight_decay, ) # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.8) scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=1, gamma=0.1, ) loss_avg = utils.Averager() net = net.to(device) for epoch in range(cfg.num_epochs): for i, (images, labels) in enumerate(train_loader): net.train() images = images.to(device) labels, labels_len = matrix2linear(labels) labels = labels.to(device) labels_len = torch.IntTensor(labels_len) preds = net(images) # print(preds.size(), preds_len) if torch.sum(torch.isnan(preds)) >= 1: print('nan: {}, lr: {}'.format(i + 1, scheduler.get_lr()[0])) break preds_len = torch.IntTensor([preds.size(0)] * int(preds.size(1))) with torch.backends.cudnn.flags(enabled=False): loss = criterion(preds, labels, preds_len, labels_len) loss_avg.add(loss) if (i + 1) % cfg.display_interval == 0: print('[Epoch {0}/{1}] [Batch {2}/{3}] Loss: {4:.3f}'.format( epoch + 1, cfg.num_epochs, i + 1, len(train_loader), loss_avg.val(), )) loss_avg.reset() optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_( net.parameters(), max_norm=20, norm_type=2, ) optimizer.step() if (i + 1) % cfg.valid_interval == 0: valid(net, valid_loader, device, cfg) scheduler.step() torch.save(net.state_dict(), '{0}/crnn_ctc_{1}.pth'.format(cfg.model_path, epoch))
def train(image, text, encoder, decoder, criterion, train_loader, teach_forcing_prob=1): logger = Logger('log/') # optimizer encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=cfg.learning_rate, betas=(0.5, 0.999)) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=cfg.learning_rate, betas=(0.5, 0.999)) # loss averager loss_avg = utils.Averager() for epoch in range(cfg.num_epochs): train_iter = iter(train_loader) for i in range(len(train_loader)): cpu_images, cpu_texts = train_iter.next() batch_size = cpu_images.size(0) for encoder_param, decoder_param in zip(encoder.parameters(), decoder.parameters()): encoder_param.requires_grad = True decoder_param.requires_grad = True encoder.train() decoder.train() target_variable = converter.encode(cpu_texts) utils.load_data(image, cpu_images) # CNN + BiLSTM encoder_outputs = encoder(image) target_variable = target_variable.cuda() # start decoder for SOS_TOKEN decoder_input = target_variable[utils.SOS_TOKEN].cuda() decoder_hidden = decoder.initHidden(batch_size).cuda() loss = 0.0 teach_forcing = True if random.random( ) > teach_forcing_prob else False if teach_forcing: for di in range(1, target_variable.shape[0]): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) loss += criterion(decoder_output, target_variable[di]) decoder_input = target_variable[di] else: for di in range(1, target_variable.shape[0]): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) loss += criterion(decoder_output, target_variable[di]) topv, topi = decoder_output.data.topk(1) ni = topi.squeeze() decoder_input = ni encoder.zero_grad() decoder.zero_grad() loss.backward() encoder_optimizer.step() decoder_optimizer.step() loss_avg.add(loss) if i % 10 == 0: print('[Epoch {0}/{1}] [Batch {2}/{3}] Loss: {4}'.format( epoch, cfg.num_epochs, i, len(train_loader), loss_avg.val())) logger.scalar_summary( 'Loss of Epoch{0}/miniBatch(100)'.format(epoch), loss_avg.val(), i) logger.scalar_summary('Loss of Epoch/miniBatch(100)', loss_avg.val(), epoch * len(train_loader) + i) loss_avg.reset() # save checkpoint torch.save(encoder.state_dict(), '{0}/encoder_{1}.pth'.format(cfg.model, epoch)) torch.save(decoder.state_dict(), '{0}/decoder_{1}.pth'.format(cfg.model, epoch))
def train(image, text, model, criterion, train_loader, teach_forcing_prob=0.5): # optimizer # encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=cfg.learning_rate, betas=(0.5, 0.999)) # decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=cfg.learning_rate, betas=(0.5, 0.999)) optimizer = torch.optim.Adam(model.parameters(), lr=cfg.learning_rate, betas=(0.5, 0.999)) # loss averager loss_avg = utils.Averager() for epoch in range(cfg.num_epochs): train_iter = iter(train_loader) for i in range(len(train_loader)): cpu_images, cpu_texts = train_iter.next() batch_size = cpu_images.size(0) # for encoder_param, decoder_param in zip(encoder.parameters(), decoder.parameters()): # encoder_param.requires_grad = True # decoder_param.requires_grad = True # encoder.train() # decoder.train() optimizer.zero_grad() # for model_param in zip(model.parameters()): # model_param.requires_grad = True model.train() # formula = formulas(int(cpu_texts)) target_variable = converter.encode(cpu_texts) utils.load_data(image, cpu_images) # # CNN + BiLSTM # encoder_outputs = encoder(image) if torch.cuda.is_available(): target_variable = target_variable.cuda() output = model(image, target_variable) # # start decoder for SOS_TOKEN # decoder_input = target_variable[utils.SOS_TOKEN].cuda() # decoder_hidden = decoder.initHidden(batch_size).cuda() # else: # decoder_input = target_variable[utils.SOS_TOKEN] # decoder_hidden = decoder.initHidden(batch_size) # # if i == 28: # # outputs for the test # print(f' decoder_input {0}', decoder_input.shape) # print(f' decoder_hidden{0}', decoder_hidden.shape) # print(f' encoder_outputs{0}', encoder_outputs.shape) # tensor2image(cpu_images[0]) # print(cpu_texts[0]) # print(target_variable[0]) # print(cpu_texts) # print(target_variable) loss = 0.0 p = True if random.random() > teach_forcing_prob else False # print(' teach_forcing: {}'.format(teach_forcing)) # print(' decoder_input.shape[0] {}, batch_size {}, batch_size condition: {}'.format(decoder_input.shape[0], batch_size, decoder_input.shape[0] < batch_size)) # if teach_forcing or decoder_input.shape[0] < cfg.batch_size: # for di in range(1, target_variable.shape[0]): # # # tensor2image(cpu_images[di]) # # print(cpu_texts[di]) # # print(target_variable[di]) # # print([converter.decode(item) for item in target_variable[di]]) # # decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs) # decoder_output, decoder_hidden, decoder_attention = model(image) # loss += criterion(decoder_output, target_variable[di]) # decoder_input = target_variable[di] # else: # for di in range(1, target_variable.shape[0]): # decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs) # loss += criterion(decoder_output, target_variable[di]) # topv, topi = decoder_output.data.topk(1) # ni = topi.squeeze() # decoder_input = ni # encoder.zero_grad() # decoder.zero_grad() output_dim = output.shape[-1] # print(output_dim) output = output[1:].view(-1, output_dim) target_variable = target_variable[1:].view(-1) # trg = [(trg len - 1) * batch size] # output = [(trg len - 1) * batch size, output dim] loss = criterion(output, target_variable) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), clip) optimizer.step() # encoder_optimizer.step() # decoder_optimizer.step() loss_avg.add(loss) if i % 1 == 0: print('[Epoch {0}/{1}] [Batch {2}/{3}] Loss: {4}'.format( epoch + 1, cfg.num_epochs, i + 1, len(train_loader), loss_avg.val())) loss_avg.reset()
def train(image, text, encoder, decoder, criterion, train_loader, teach_forcing_prob=0.5): # optimizer encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=cfg.learning_rate, betas=(0.5, 0.999)) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=cfg.learning_rate, betas=(0.5, 0.999)) # loss averager loss_avg = utils.Averager() for epoch in range(cfg.num_epochs): train_iter = iter(train_loader) for i in range(len(train_loader)): encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() cpu_images, cpu_texts = train_iter.next() batch_size = cpu_images.size(0) for encoder_param, decoder_param in zip(encoder.parameters(), decoder.parameters()): encoder_param.requires_grad = True decoder_param.requires_grad = True encoder.train() decoder.train() # formula = formulas(int(cpu_texts)) target_variable = converter.encode(cpu_texts) utils.load_data(image, cpu_images) # CNN + BiLSTM encoder_outputs = encoder(image) if torch.cuda.is_available(): target_variable = target_variable.cuda() # start decoder for SOS_TOKEN decoder_input = target_variable[utils.SOS_TOKEN].cuda() decoder_hidden = decoder.initHidden(batch_size).cuda() else: decoder_input = target_variable[utils.SOS_TOKEN] decoder_hidden = decoder.initHidden(batch_size) # # if i == 28: # # outputs for the test # print(f' decoder_input {0}', decoder_input.shape) # print(f' decoder_hidden{0}', decoder_hidden.shape) # print(f' encoder_outputs{0}', encoder_outputs.shape) # tensor2image(cpu_images[0]) # print(cpu_texts[0]) # print(target_variable[0]) loss = 0.0 teach_forcing = True if random.random( ) > teach_forcing_prob else False # print(' teach_forcing: {}'.format(teach_forcing)) # print(' decoder_input.shape[0] {}, batch_size {}, batch_size condition: {}'.format(decoder_input.shape[0], batch_size, decoder_input.shape[0] < batch_size)) if teach_forcing or decoder_input.shape[0] < cfg.batch_size: for di in range(1, target_variable.shape[0]): # tensor2image(cpu_images[di]) # print(cpu_texts[di]) # print(target_variable[di]) # print([converter.decode(item) for item in target_variable[di]]) decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) loss += criterion(decoder_output, target_variable[di]) decoder_input = target_variable[di] else: for di in range(1, target_variable.shape[0]): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) loss += criterion(decoder_output, target_variable[di]) topv, topi = decoder_output.data.topk(1) ni = topi.squeeze() decoder_input = ni # print('predict: {}'.format(converter.decode(ni[0]))) # print('target: {}'.format(converter.decode(target_variable[di][0]))) encoder.zero_grad() decoder.zero_grad() loss.backward() # torch.nn.utils.clip_grad_norm_(encoder.parameters(), clip) # torch.nn.utils.clip_grad_norm_(decoder.parameters(), clip) encoder_optimizer.step() decoder_optimizer.step() loss_avg.add(loss) if i % 1 == 0: print('[Epoch {0}/{1}] [Batch {2}/{3}] Loss: {4}'.format( epoch + 1, cfg.num_epochs, i + 1, len(train_loader), loss_avg.val())) loss_avg.reset() # save checkpoint torch.save(encoder.state_dict(), '{0}/encoder_{1}.pth'.format(cfg.model, epoch)) torch.save(decoder.state_dict(), '{0}/decoder_{1}.pth'.format(cfg.model, epoch))