def main(): args = parse_arguments() if not args.train and not args.test: print("If we are not training,and not testing,what is the point?") crnn = None if args.train: crnn = CRNN( args.batch_size, args.model_path, args.example_path, args.max_image_width, args.train_test_ratio, args.restore ) crnn.train(args.iteration_count) if args.test: if crnn is None: crnn = CRNN( args.batch_size, args.model_path, args.examples_path, args.max_image_width, 0, args.restore ) crnn.test()
def main(): """ Entry point when using CRNN from the commandline """ args = parse_arguments() if not args.train and not args.test: print("If we are not training, and not testing, what is the point?") crnn = None if args.train: crnn = CRNN(args.iteration_count, args.batch_size, args.model_path, args.examples_path, args.max_image_width, args.train_test_ratio, args.restore, 0) crnn.train(args.iteration_count) if args.test: if crnn is None: crnn = CRNN(args.iteration_count, args.batch_size, args.model_path, args.examples_path, args.max_image_width, 0, args.restore, 1) crnn.test()
def main(): """ Entry point when using CRNN from the commandline """ args = parse_arguments() if not args.train and not args.test: print("If we are not training, and not testing, what is the point?") crnn = None charset = "" if os.path.isfile(args.char_set_string): # if charset is file read from file. with open(args.char_set_string, "r") as f: while True: c = f.readline() charset += c.strip("\n") if not c: charset += "\n" # Add line break to charset at the end break else: charset = args.char_set_string if args.train: crnn = CRNN( args.batch_size, args.model_path, args.examples_path, args.max_image_width, args.train_test_ratio, args.restore, charset, args.use_trdg, args.language, args.learning_rate ) crnn.train(args.iteration_count) if args.test: if crnn is None: crnn = CRNN( args.batch_size, args.model_path, args.examples_path, args.max_image_width, 0, args.restore, charset, args.use_trdg, args.language, args.learning_rate ) crnn.test()
def main(): """ Entry point when using CRNN from the commandline """ args = parse_arguments() if not args.train and not args.test: print("If we are not training, and not testing, what is the point?") crnn = None if args.train: crnn = CRNN( args.batch_size, args.model_path, args.examples_path, args.max_image_width, args.train_test_ratio, args.restore ) crnn.train(args.iteration_count) if args.test: if crnn is None: crnn = CRNN( args.batch_size, args.model_path, args.examples_path, args.max_image_width, 0, args.restore ) crnn.test()
def start_train(): model = CRNN(MODEL_HYPER.batch_size, MODEL_HYPER.epoches, MODEL_HYPER.data_path, MODEL_HYPER.text_path, MODEL_HYPER.log_path, MODEL_HYPER.model_path) model.train() model.save()
# accuracy = n_correct / float((i + 1) * opt.val_batchsize) accuracy_train = n_correct_train / float(opt.num_val * opt.val_batchsize) print('accuray_train: %f' % (accuracy_train)) return accuracy_train step = 0 for epoch in range(opt.num_epoch): for i, (inputs, labels, lengths) in enumerate(dloader_train): for p in net.parameters(): # p.requires_grad = True p.requires_grad_(True) net.train() # inputs, labels, lengths = Variable(inputs.cuda()), Variable(labels), Variable(lengths) inputs = inputs.cuda() label = [] for j in range(labels.size(0)): if lengths.data[j, 0] == 0: continue label.append(labels[j, :lengths.data[j, 0]]) label = torch.cat(label) preds = net(inputs) bs = inputs.size(0) # pred_size = torch.IntTensor([preds.size(0)] * bs) pred_size = torch.tensor([preds.size(0)] * bs, dtype=torch.int32) optimizer.zero_grad() loss = criterion(preds, label, pred_size, lengths[:, 0]) / opt.train_batchsize
from crnn import CRNN batch_size = 10 model_path = 'MyModel' examples_picture_path = 'restore/' examples_label_path = 'target_label.txt' dictionary_path = 'dictionary.txt' max_image_width = 256 train_test_ratio = 0.9 restore = False NUM_CLASSES = 52 iteration_count = 4000 crnn = CRNN(batch_size, model_path, examples_picture_path, examples_label_path, dictionary_path, max_image_width, train_test_ratio, restore, NUM_CLASSES) if __name__ == '__main__': crnn.train(iteration_count)
def main(): conf_file = "conf/train.yml" with open(conf_file, 'r') as f: args = edict(yaml.load(f)) train_root = args.train_root test_root = args.test_root batch_size = args.batch_size max_len = args.max_len img_h = args.img_h img_w = args.img_w n_hidden = args.n_hidden n_iter = args.n_iter lr = args.lr cuda = args.cuda val_interval = args.val_interval save_interval = args.save_interval model_dir = args.model_dir debug_level = args.debug_level experiment = args.experiment n_channel = args.n_channel n_class = args.n_class beta = args.beta image = torch.FloatTensor(batch_size, n_channel, img_h, img_h) text = torch.IntTensor(batch_size * max_len) length = torch.IntTensor(batch_size) logging.getLogger().setLevel(debug_level) ''' 50 - critical 40 - error 30 - warining 20 - info 10 - debug ''' crnn = CRNN(img_h, n_channel, n_class, n_hidden).cuda() crnn.apply(weights_init) criterion = CTCLoss().cuda() optimizer = optim.RMSprop(crnn.parameters(), lr=lr) # optimizer = optim.Adam(crnn.parameters(), lr=lr, # betas=(beta, 0.999)) trainset = train_set(train_root, batch_size, img_h, img_w, n_class) valset = train_set(test_root, batch_size, img_h, img_w, n_class) cur_iter = 0 for ITER in range(n_iter): for train_img, train_label, train_lengths, batch_label \ in iter(trainset): for p in crnn.parameters(): p.requires_grad = True crnn.train() if train_img is None: break cur_iter += 1 loadData(image, train_img) loadData(text, train_label) loadData(length, train_lengths) preds = crnn(train_img.cuda()) # preds = F.softmax(preds, dim=2) # print(preds.shape) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) # print(batch_label, text, length, len(text), len(length), length.sum(), # preds.shape, preds_size.shape) cost = criterion(preds, text, preds_size, length)\ / batch_size crnn.zero_grad() cost.backward() optimizer.step() print("training-iter {} cost {}".format( ITER, cost.cpu().detach().numpy()[0])) if cur_iter % val_interval == 0: val(crnn, valset, criterion, n_class) if cur_iter % save_interval == 0: model_file = os.path.join(model_dir, "crnn_iter{}.pth".format(ITER)) print("saving in file {}".format(model_file)) with open(model_file, 'wb') as f: torch.save(crnn, f)
def train(root, start_epoch, epoch_num, letters, net=None, lr=0.1, fix_width=True): """ Train CRNN model Args: root (str): Root directory of dataset start_epoch (int): Epoch number to start epoch_num (int): Epoch number to train letters (str): Letters contained in the data net (CRNN, optional): CRNN model (default: None) lr (float, optional): Coefficient that scale delta before it is applied to the parameters (default: 1.0) fix_width (bool, optional): Scale images to fixed size (default: True) Returns: CRNN: Trained CRNN model """ # load data trainloader = load_data(root, training=True, fix_width=fix_width) if not net: # create a new model if net is None net = CRNN(1, len(letters) + 1) criterion = torch.nn.CTCLoss() optimizer = optim.Adadelta(net.parameters(), lr=lr, weight_decay=1e-3) # use gpu or not use_cuda = torch.cuda.is_available() use_cuda = False device = torch.device('cuda' if use_cuda else 'cpu') if use_cuda: net = net.to(device) criterion = criterion.to(device) else: print("***** Warning: Cuda isn't available! *****") # get encoder and decoder labeltransformer = LabelTransformer(letters) print('==== Training.. ====') # .train() has any effect on Dropout and BatchNorm. net.train() for epoch in range(start_epoch, start_epoch + epoch_num): print('---- epoch: %d ----' % (epoch, )) loss_sum = 0 for i, (img, label) in enumerate(trainloader): label, label_length = labeltransformer.encode(label) img = img.to(device) optimizer.zero_grad() # put images in outputs = net(img) output_length = torch.IntTensor([outputs.size(0)] * outputs.size(1)) # calc loss loss = criterion(outputs, label, output_length, label_length) # update loss.backward() optimizer.step() loss_sum += loss.item() print('loss = %f' % (loss_sum, )) print('Finished Training') return net
def train(): epoch_num = train_parameters["num_epochs"] batch_size = train_parameters["train_batch_size"] place = fluid.CUDAPlace( 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() logger.info('train with {}'.format(place)) with fluid.dygraph.guard(place): # 数据加载 file_list = open(train_parameters['train_list']).readlines() train_reader = get_loader( file_list=file_list, input_size=train_parameters['input_size'], max_char_per_line=train_parameters['max_char_per_line'], mean_color=train_parameters['mean_color'], batch_size=train_parameters['train_batch_size'], mode='train', label_dict=train_parameters['label_dict'], place=place) batch_num = len(train_reader()) crnn = CRNN(train_parameters["class_dim"] + 1, batch_size=batch_size) total_step = batch_num * epoch_num LR = train_parameters['learning_rate'] lr = fluid.layers.polynomial_decay(LR, total_step, 1e-7, power=0.9) # lr = fluid.layers.piecewise_decay([total_step // 3, total_step * 2 // 3], [LR, LR * 0.1, LR * 0.01]) optimizer = fluid.optimizer.Adam(learning_rate=lr, parameter_list=crnn.parameters()) if train_parameters["continue_train"]: # 加载上一次训练的模型,继续训练 params_dict, opt_dict = fluid.load_dygraph('{}/crnn_latest'.format( train_parameters['save_model_dir'])) crnn.set_dict(params_dict) optimizer.set_dict(opt_dict) logger.info("load model from {}".format( train_parameters['save_model_dir'])) current_best = -1 start_epoch = 0 for epoch in range(start_epoch, epoch_num): crnn.train() tic = time.time() for batch_id, (img, label, label_len) in enumerate(train_reader()): out = crnn(img) out_for_loss = fluid.layers.transpose(out, [1, 0, 2]) input_length = np.array([out.shape[1]] * out.shape[0]).astype("int64") input_length = fluid.dygraph.to_variable(input_length) input_length.stop_gradient = True loss = fluid.layers.warpctc( input=out_for_loss, label=label.astype(np.int32), input_length=input_length, label_length=label_len, blank=train_parameters["class_dim"], norm_by_times=True) avg_loss = fluid.layers.reduce_mean(loss) cur_acc_num, cur_all_num = acc_batch(out.numpy(), label.numpy()) if batch_id % 1 == 0: logger.info( "epoch [{}/{}], step [{}/{}], loss: {:.6f}, acc: {:.4f}, lr: {}, time: {:.4f}" .format(epoch, epoch_num, batch_id, batch_num, avg_loss.numpy()[0], cur_acc_num / cur_all_num, optimizer.current_step_lr(), time.time() - tic)) tic = time.time() avg_loss.backward() optimizer.minimize(avg_loss) crnn.clear_gradients() fluid.save_dygraph( crnn.state_dict(), '{}/crnn_latest'.format(train_parameters['save_model_dir'])) fluid.save_dygraph( optimizer.state_dict(), '{}/crnn_latest'.format(train_parameters['save_model_dir'])) crnn.eval() ratio = eval_model(crnn, place=place) if ratio >= current_best: fluid.save_dygraph( crnn.state_dict(), '{}/crnn_best'.format(train_parameters['save_model_dir'])) fluid.save_dygraph( optimizer.state_dict(), '{}/crnn_best'.format(train_parameters['save_model_dir'])) current_best = ratio logger.info("save model to {}, current best acc:{:.2f}".format( train_parameters['save_model_dir'], ratio)) logger.info("train end")
print("-----Test-----step:%d/%d----loss value:%f-----accuracy:%f" % (teststep, len(testdata), loss, accuracy)) writer.add_scalar("Testing Loss", loss, step) writer.add_scalar("Testing accuracy", accuracy, epoch) step = 1 for epoch in range(0, opt.epoch): correct = 0 number = 0 for i, batch in enumerate(traindata): raw_text = batch["label"] encode_text, length = process.encodetext(raw_text) encode_text = Variable(encode_text).to(device) image = Variable(batch["image"]).to(device) model.train() output = model(image) optimizer.zero_grad() output_size = Variable( torch.IntTensor([output.size(0)] * opt.batchsize)).to(device) loss = lossfunction(output, encode_text, output_size, length) loss.backward() optimizer.step() _, output = output.max(2) output = output.transpose(1, 0) outputtext = [] for i in range(0, output.size(0)): decode_text = process.decodetext(output[i]) outputtext += [decode_text] number += 1 if decode_text == raw_text[i]:
def test_train(self): ''' parameters of train ''' # test_root = "data/ocr_dataset_val" # train_root = "data/ocr_dataset" train_root = "data/ocr_dataset_train_400_10/" test_root = "data/ocr_dataset_train_50_10_val/" batch_size = 20 max_len = 15 img_h, img_w = 32, 150 n_hidden = 512 n_iter = 400 lr = 0.00005 cuda = True val_interval = 250 save_interval = 1000 model_dir = "models" debug_level = 20 experiment = "experiment" n_channel = 3 n_class = 11 beta = 0.5 image = torch.FloatTensor(batch_size, n_channel, img_h, img_h) text = torch.IntTensor(batch_size * max_len) length = torch.IntTensor(batch_size) logging.getLogger().setLevel(debug_level) ''' 50 - critical 40 - error 30 - warining 20 - info 10 - debug ''' crnn = CRNN(img_h, n_channel, n_class, n_hidden).cuda() crnn.apply(weights_init) criterion = CTCLoss().cuda() optimizer = optim.RMSprop(crnn.parameters(), lr=lr) # optimizer = optim.Adam(crnn.parameters(), lr=lr, # betas=(beta, 0.999)) trainset = train_set(train_root, batch_size, img_h, img_w, n_class) valset = train_set(test_root, batch_size, img_h, img_w, n_class) cur_iter = 0 for ITER in range(n_iter): for train_img, train_label, train_lengths, batch_label in iter( trainset): for p in crnn.parameters(): p.requires_grad = True crnn.train() if train_img is None: break cur_iter += 1 loadData(image, train_img) loadData(text, train_label) loadData(length, train_lengths) preds = crnn(train_img.cuda()) # preds = F.softmax(preds, dim=2) # print(preds.shape) preds_size = Variable( torch.IntTensor([preds.size(0)] * batch_size)) # print(batch_label, text, length, len(text), len(length), length.sum(), # preds.shape, preds_size.shape) cost = criterion(preds, text, preds_size, length) / batch_size crnn.zero_grad() cost.backward() optimizer.step() print("training-iter {} cost {}".format( ITER, cost.cpu().detach().numpy()[0])) if cur_iter % val_interval == 0: val(crnn, valset, criterion, n_class) if cur_iter % save_interval == 0: model_file = os.path.join(model_dir, "crnn_iter{}.pth".format(ITER)) print("saving in file {}".format(model_file)) with open(model_file, 'wb') as f: torch.save(crnn, f)