def eval(path="checkpoint3.pt"): net = CRNN(nclass=100).double() optimizer = optim.Adam(net.parameters()) checkpoint = torch.load(path) net.load_state_dict(checkpoint["model_state_dict"]) optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) epoch = checkpoint["epoch"] loss = checkpoint["loss"] print(f"model current epoch: {epoch} with loss: {loss}") net.eval() while 1: data = next(dataset) images = data["the_inputs"] labels = data["the_labels"] input_length = data["input_length"] label_length = data["label_length"] preds = net(images).detach() pred_texts, probs = decode_batch2(preds, string.printable) for i in range(len(pred_texts)): print(pred_texts[i], probs[i]) print(images[i].size())
if __name__ == '__main__': from paddle import fluid total_step = 30 LR = 1e-3 with fluid.dygraph.guard(): lr = fluid.layers.piecewise_decay( [total_step // 3, total_step * 2 // 3], [LR, LR * 0.1, LR * 0.01]) # lr = fluid.layers.polynomial_decay(LR,total_step,1e-7,power=0.9) from crnn import CRNN crnn = CRNN(train_parameters["class_dim"] + 1, batch_size=16) optimizer = fluid.optimizer.Adam(learning_rate=lr, parameter_list=crnn.parameters()) step = [] lr = [] for x in range(total_step): step.append(x) l = fluid.dygraph.to_variable(np.array([1])) optimizer.minimize(l) lr.append(optimizer.current_step_lr()) print(x, optimizer.current_step_lr()) from matplotlib import pyplot as plt plt.plot(step, lr) plt.show() # import paddle # temp_reader = custom_reader1()
def main(): conf_file = "conf/train.yml" with open(conf_file, 'r') as f: args = edict(yaml.load(f)) train_root = args.train_root test_root = args.test_root batch_size = args.batch_size max_len = args.max_len img_h = args.img_h img_w = args.img_w n_hidden = args.n_hidden n_iter = args.n_iter lr = args.lr cuda = args.cuda val_interval = args.val_interval save_interval = args.save_interval model_dir = args.model_dir debug_level = args.debug_level experiment = args.experiment n_channel = args.n_channel n_class = args.n_class beta = args.beta image = torch.FloatTensor(batch_size, n_channel, img_h, img_h) text = torch.IntTensor(batch_size * max_len) length = torch.IntTensor(batch_size) logging.getLogger().setLevel(debug_level) ''' 50 - critical 40 - error 30 - warining 20 - info 10 - debug ''' crnn = CRNN(img_h, n_channel, n_class, n_hidden).cuda() crnn.apply(weights_init) criterion = CTCLoss().cuda() optimizer = optim.RMSprop(crnn.parameters(), lr=lr) # optimizer = optim.Adam(crnn.parameters(), lr=lr, # betas=(beta, 0.999)) trainset = train_set(train_root, batch_size, img_h, img_w, n_class) valset = train_set(test_root, batch_size, img_h, img_w, n_class) cur_iter = 0 for ITER in range(n_iter): for train_img, train_label, train_lengths, batch_label \ in iter(trainset): for p in crnn.parameters(): p.requires_grad = True crnn.train() if train_img is None: break cur_iter += 1 loadData(image, train_img) loadData(text, train_label) loadData(length, train_lengths) preds = crnn(train_img.cuda()) # preds = F.softmax(preds, dim=2) # print(preds.shape) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) # print(batch_label, text, length, len(text), len(length), length.sum(), # preds.shape, preds_size.shape) cost = criterion(preds, text, preds_size, length)\ / batch_size crnn.zero_grad() cost.backward() optimizer.step() print("training-iter {} cost {}".format( ITER, cost.cpu().detach().numpy()[0])) if cur_iter % val_interval == 0: val(crnn, valset, criterion, n_class) if cur_iter % save_interval == 0: model_file = os.path.join(model_dir, "crnn_iter{}.pth".format(ITER)) print("saving in file {}".format(model_file)) with open(model_file, 'wb') as f: torch.save(crnn, f)
if opt.finetune: print('Loading model from', opt.modeldir + opt.modelname) net.load_state_dict(torch.load(opt.modeldir + opt.modelname)) else: print('create new model') net.apply(weights_init) if opt.ngpu > 1: # print("Let's use", torch.cuda.device_count(), "GPUs!") net = nn.DataParallel(net, device_ids=range(opt.ngpu)) net.cuda() criterion = CTCLoss().cuda() if opt.adadelta: optimizer = optim.Adadelta(net.parameters(), lr=opt.lr) # , weight_decay=1e-8) elif opt.rms: optimizer = optim.RMSprop(net.parameters(), lr=opt.lr) else: optimizer = optim.Adam(net.parameters(), lr=opt.lr, betas=(0.5, 0.999), weight_decay=0.003) def val_test(): print('Start val_test') for p in net.parameters(): p.requires_grad = False
def weight_init(module): class_name = module.__class__.__name__ if class_name.find('Conv') != -1: module.weight.data.normal_(0, 0.02) if class_name.find('BatchNorm') != -1: module.weight.data.normal_(1, 0.02) module.bias.data.fill_(0) crnn.apply(weight_init) loss_function = CTCLoss(zero_infinity=True) loss_function = loss_function.cuda() optimizer = Adadelta(crnn.parameters()) converter = Converter(option.alphabet) print_every = 100 total_loss = 0.0 def validation(): print('start validation...') crnn.eval() total_loss = 0.0 n_correct = 0 for i, (input, label) in enumerate(validationset_dataloader): if i == len(validationset_dataloader) - 1: continue if i == 9: break
def train(path=None): dataset = FakeTextImageGenerator(batch_size=16).iter() criterion = CTCLoss(reduction="mean", zero_infinity=True) net = CRNN(nclass=100).float() optimizer = optim.Adam(net.parameters(), lr=0.001) if path: checkpoint = torch.load(path) net.load_state_dict(checkpoint["model_state_dict"]) optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) epoch = checkpoint["epoch"] loss = checkpoint["loss"] print(f"model current epoch: {epoch} with loss: {loss}") # loop over the dataset multiple times for epoch in range(1, 1000): running_loss = 0.0 loop = tqdm(range(100)) for i in loop: data = next(dataset) images = data["the_inputs"] labels = data["the_labels"] input_length = data["input_length"] label_length = data["label_length"] targets = data["targets"] # print("target", targets) # print("target l", targets.size()) # print("label_l", label_length) # print("label_l l", label_length.size()) # print("pred_l", input_length) # print("pred_l l", input_length.size()) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(images.float()) # print(outputs[8, 0, :]) # print(outputs[:, 0, :]) # print(outputs.size()) loss = criterion(outputs, labels, input_length, label_length) # print(loss.item()) loss.backward() optimizer.step() running_loss += loss.item() loop.set_postfix(epoch=epoch, loss=(running_loss / (i + 1))) # print(f"Epoch: {epoch} | Loss: {running_loss/100}") torch.save( { "epoch": epoch, "model_state_dict": net.state_dict(), "optimizer_state_dict": optimizer.state_dict(), "loss": running_loss, }, "checkpoint5.pt", ) print("Finished Training")
def train(root, start_epoch, epoch_num, letters, net=None, lr=0.1, fix_width=True): """ Train CRNN model Args: root (str): Root directory of dataset start_epoch (int): Epoch number to start epoch_num (int): Epoch number to train letters (str): Letters contained in the data net (CRNN, optional): CRNN model (default: None) lr (float, optional): Coefficient that scale delta before it is applied to the parameters (default: 1.0) fix_width (bool, optional): Scale images to fixed size (default: True) Returns: CRNN: Trained CRNN model """ # load data trainloader = load_data(root, training=True, fix_width=fix_width) if not net: # create a new model if net is None net = CRNN(1, len(letters) + 1) criterion = torch.nn.CTCLoss() optimizer = optim.Adadelta(net.parameters(), lr=lr, weight_decay=1e-3) # use gpu or not use_cuda = torch.cuda.is_available() use_cuda = False device = torch.device('cuda' if use_cuda else 'cpu') if use_cuda: net = net.to(device) criterion = criterion.to(device) else: print("***** Warning: Cuda isn't available! *****") # get encoder and decoder labeltransformer = LabelTransformer(letters) print('==== Training.. ====') # .train() has any effect on Dropout and BatchNorm. net.train() for epoch in range(start_epoch, start_epoch + epoch_num): print('---- epoch: %d ----' % (epoch, )) loss_sum = 0 for i, (img, label) in enumerate(trainloader): label, label_length = labeltransformer.encode(label) img = img.to(device) optimizer.zero_grad() # put images in outputs = net(img) output_length = torch.IntTensor([outputs.size(0)] * outputs.size(1)) # calc loss loss = criterion(outputs, label, output_length, label_length) # update loss.backward() optimizer.step() loss_sum += loss.item() print('loss = %f' % (loss_sum, )) print('Finished Training') return net
def train(): epoch_num = train_parameters["num_epochs"] batch_size = train_parameters["train_batch_size"] place = fluid.CUDAPlace( 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() logger.info('train with {}'.format(place)) with fluid.dygraph.guard(place): # 数据加载 file_list = open(train_parameters['train_list']).readlines() train_reader = get_loader( file_list=file_list, input_size=train_parameters['input_size'], max_char_per_line=train_parameters['max_char_per_line'], mean_color=train_parameters['mean_color'], batch_size=train_parameters['train_batch_size'], mode='train', label_dict=train_parameters['label_dict'], place=place) batch_num = len(train_reader()) crnn = CRNN(train_parameters["class_dim"] + 1, batch_size=batch_size) total_step = batch_num * epoch_num LR = train_parameters['learning_rate'] lr = fluid.layers.polynomial_decay(LR, total_step, 1e-7, power=0.9) # lr = fluid.layers.piecewise_decay([total_step // 3, total_step * 2 // 3], [LR, LR * 0.1, LR * 0.01]) optimizer = fluid.optimizer.Adam(learning_rate=lr, parameter_list=crnn.parameters()) if train_parameters["continue_train"]: # 加载上一次训练的模型,继续训练 params_dict, opt_dict = fluid.load_dygraph('{}/crnn_latest'.format( train_parameters['save_model_dir'])) crnn.set_dict(params_dict) optimizer.set_dict(opt_dict) logger.info("load model from {}".format( train_parameters['save_model_dir'])) current_best = -1 start_epoch = 0 for epoch in range(start_epoch, epoch_num): crnn.train() tic = time.time() for batch_id, (img, label, label_len) in enumerate(train_reader()): out = crnn(img) out_for_loss = fluid.layers.transpose(out, [1, 0, 2]) input_length = np.array([out.shape[1]] * out.shape[0]).astype("int64") input_length = fluid.dygraph.to_variable(input_length) input_length.stop_gradient = True loss = fluid.layers.warpctc( input=out_for_loss, label=label.astype(np.int32), input_length=input_length, label_length=label_len, blank=train_parameters["class_dim"], norm_by_times=True) avg_loss = fluid.layers.reduce_mean(loss) cur_acc_num, cur_all_num = acc_batch(out.numpy(), label.numpy()) if batch_id % 1 == 0: logger.info( "epoch [{}/{}], step [{}/{}], loss: {:.6f}, acc: {:.4f}, lr: {}, time: {:.4f}" .format(epoch, epoch_num, batch_id, batch_num, avg_loss.numpy()[0], cur_acc_num / cur_all_num, optimizer.current_step_lr(), time.time() - tic)) tic = time.time() avg_loss.backward() optimizer.minimize(avg_loss) crnn.clear_gradients() fluid.save_dygraph( crnn.state_dict(), '{}/crnn_latest'.format(train_parameters['save_model_dir'])) fluid.save_dygraph( optimizer.state_dict(), '{}/crnn_latest'.format(train_parameters['save_model_dir'])) crnn.eval() ratio = eval_model(crnn, place=place) if ratio >= current_best: fluid.save_dygraph( crnn.state_dict(), '{}/crnn_best'.format(train_parameters['save_model_dir'])) fluid.save_dygraph( optimizer.state_dict(), '{}/crnn_best'.format(train_parameters['save_model_dir'])) current_best = ratio logger.info("save model to {}, current best acc:{:.2f}".format( train_parameters['save_model_dir'], ratio)) logger.info("train end")
torch.nn.init.normal_(m.weight.data, 0.0, 0.02).cuda() if hasattr(m, "bias") and m.bias is not None: torch.nn.init.constant_(m.bias.data, 0.0).cuda() elif classname.find("BatchNorm2d") != -1: torch.nn.init.normal_(m.weight.data, 1.0, 0.02).cuda() torch.nn.init.constant_(m.bias.data, 0.0).cuda() # weitghts initalize weights_init(model) #load saved model if opt.savedmodel != "save": model.load_state_dict(torch.load("savedmodel/%s.pth" % opt.savedmodel)) #create optimizer optimizer = torch.optim.Adadelta(model.parameters()) #dataloader traindata = DataLoader(dataprocessing.ImageDataset(opt.dataroot, mode="train"), batch_size=opt.batchsize, shuffle=True) print(len(traindata)) testdata = DataLoader(dataprocessing.ImageDataset(opt.dataroot, mode="test"), batch_size=opt.batchsize, shuffle=True) print(len(testdata)) # Loss function lossfunction = CTCLoss() process = dataprocessing.ProcessText(opt.alphabet) # tensorboard writer = SummaryWriter()
def test_train(self): ''' parameters of train ''' # test_root = "data/ocr_dataset_val" # train_root = "data/ocr_dataset" train_root = "data/ocr_dataset_train_400_10/" test_root = "data/ocr_dataset_train_50_10_val/" batch_size = 20 max_len = 15 img_h, img_w = 32, 150 n_hidden = 512 n_iter = 400 lr = 0.00005 cuda = True val_interval = 250 save_interval = 1000 model_dir = "models" debug_level = 20 experiment = "experiment" n_channel = 3 n_class = 11 beta = 0.5 image = torch.FloatTensor(batch_size, n_channel, img_h, img_h) text = torch.IntTensor(batch_size * max_len) length = torch.IntTensor(batch_size) logging.getLogger().setLevel(debug_level) ''' 50 - critical 40 - error 30 - warining 20 - info 10 - debug ''' crnn = CRNN(img_h, n_channel, n_class, n_hidden).cuda() crnn.apply(weights_init) criterion = CTCLoss().cuda() optimizer = optim.RMSprop(crnn.parameters(), lr=lr) # optimizer = optim.Adam(crnn.parameters(), lr=lr, # betas=(beta, 0.999)) trainset = train_set(train_root, batch_size, img_h, img_w, n_class) valset = train_set(test_root, batch_size, img_h, img_w, n_class) cur_iter = 0 for ITER in range(n_iter): for train_img, train_label, train_lengths, batch_label in iter( trainset): for p in crnn.parameters(): p.requires_grad = True crnn.train() if train_img is None: break cur_iter += 1 loadData(image, train_img) loadData(text, train_label) loadData(length, train_lengths) preds = crnn(train_img.cuda()) # preds = F.softmax(preds, dim=2) # print(preds.shape) preds_size = Variable( torch.IntTensor([preds.size(0)] * batch_size)) # print(batch_label, text, length, len(text), len(length), length.sum(), # preds.shape, preds_size.shape) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() print("training-iter {} cost {}".format( ITER, cost.cpu().detach().numpy()[0])) if cur_iter % val_interval == 0: val(crnn, valset, criterion, n_class) if cur_iter % save_interval == 0: model_file = os.path.join(model_dir, "crnn_iter{}.pth".format(ITER)) print("saving in file {}".format(model_file)) with open(model_file, 'wb') as f: torch.save(crnn, f)
num_classes = len(label_list) rnn_input_size = int((inp_size - conv_kernel + 2 * padding) / stride + 1) * channels_out ''' INITIALIZE MODEL ''' model = CRNN(conv_kernel, channels_out, rnn_input_size, hidden_neurons_1, hidden_neurons_2, fc1, num_classes) print("MODEL ARCHITECTURE:") print(model) ''' INITIALIZE LOSS FUNCTION AND MODEL OPTIMIZER ''' # Any optimizer can be chosen optimizer = optim.SGD(params=model.parameters(), lr=lr, momentum=0.9) lambda1 = lambda epoch: 0.95**epoch scheduler = LambdaLR(optimizer, lr_lambda=lambda1) loss_function = nn.NLLLoss() ''' START TRAINING PROCEDURE ''' best_acc = 0 training_loss = [] test_accuracy = [] for epoch in range(epochs): total_loss = 0 ''' START EVALUATION PROCEDURE (Start evaluation already before training for comparison) '''