def test(opt): """ model configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) model = Model(opt.imgH, opt.imgW, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, Transformation=opt.Transformation, FeatureExtraction=opt.FeatureExtraction, SequenceModeling=opt.SequenceModeling, Prediction=opt.Prediction) print('model input parameters', opt.imgH, opt.imgW, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) model = torch.nn.DataParallel(model).cuda() # load model if opt.saved_model != '': print('loading pretrained model from %s' % opt.saved_model) model.load_state_dict(torch.load(opt.saved_model)) opt.name = '_'.join(opt.saved_model.split('/')[1:]) # print(model) """ keep evaluation model and result logs """ os.makedirs(f'./result/{opt.name}', exist_ok=True) os.system(f'cp {opt.saved_model} ./result/{opt.name}/') """ setup loss """ if 'CTC' in opt.Prediction: criterion = CTCLoss(reduction='sum') else: criterion = torch.nn.CrossEntropyLoss( ignore_index=0).cuda() # ignore [GO] token = ignore index 0 """ evaluation """ model.eval() if opt.benchmark_all_eval: # evaluation with 10 benchmark evaluation datasets benchmark_all_eval(model, criterion, converter, opt) else: AlignCollate_evaluation = AlignCollate(imgH=opt.imgH, imgW=opt.imgW) eval_data = hierarchical_dataset(root=opt.eval_data, opt=opt) evaluation_loader = torch.utils.data.DataLoader( eval_data, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_evaluation, pin_memory=True) _, accuracy_by_best_model, _, _, _, _, _ = validation( model, criterion, evaluation_loader, converter, opt) print(accuracy_by_best_model) with open('./result/{0}/log_evaluation.txt'.format(opt.name), 'a') as log: log.write(str(accuracy_by_best_model) + '\n')
def __init__(self, num_classes=72): super(LPRNet, self).__init__() self.cuda = torch.cuda.is_available() self.num_classes = num_classes self.module = self.__create_module() self.criterion = CTCLoss(average_frames=True, reduction="mean", blank=0)
def __init__(self, model, train_dataloader, val_dataloader, param): super(CTCTrainer, self).__init__() # ---- create saving dir and backup files self.checkpoint_dir = param.saving_path if not os.path.exists(self.checkpoint_dir): os.makedirs(self.checkpoint_dir) self._copy_backups() self.log_filename = os.path.join(self.checkpoint_dir, "log.txt") # ---- display configurations pp = pprint.PrettyPrinter(indent=4) LOG(pp.pformat(param), self.log_filename) LOG( '=' * 50 + '\n# Params = {}'.format( sum(p.numel() for p in model.parameters() if p.requires_grad)), self.log_filename) self.p_tr = param.train # ---- construct data loaders self.train_dataloader = train_dataloader self.val_dataloader = val_dataloader # ---- construct model self.model = model # ---- loss function and optimizer self.num_classes = param.num_classes self.ctc = CTCLoss(reduction='mean', blank=self.num_classes - 1) self.optimizer = optim.RMSprop(self.model.parameters(), lr=self.p_tr.learning_rate, alpha=0.95, weight_decay=self.p_tr.l2_weight_decay) self.scheduler = ReduceLROnPlateau( self.optimizer, factor=self.p_tr.lr_factor, patience=self.p_tr.lr_reduce_patient, mode='min')
def main(opts): model_name = 'OCT-E2E-MLT' net = OctMLT(attention=True) print("Using {0}".format(model_name)) learning_rate = opts.base_lr optimizer = torch.optim.Adam(net.parameters(), lr=opts.base_lr, weight_decay=weight_decay) step_start = 0 if os.path.exists(opts.model): print('loading model from %s' % args.model) step_start, learning_rate = net_utils.load_net(args.model, net) if opts.cuda: net.cuda() net.train() data_generator = data_gen.get_batch(num_workers=opts.num_readers, input_size=opts.input_size, batch_size=opts.batch_size, train_list=opts.train_list, geo_type=opts.geo_type) dg_ocr = ocr_gen.get_batch(num_workers=2, batch_size=opts.ocr_batch_size, train_list=opts.ocr_feed_list, in_train=True, norm_height=norm_height, rgb=True) train_loss = 0 bbox_loss, seg_loss, angle_loss = 0., 0., 0. cnt = 0 ctc_loss = CTCLoss() ctc_loss_val = 0 box_loss_val = 0 good_all = 0 gt_all = 0 best_step = step_start best_loss = 1000000 best_model = net.state_dict() best_optimizer = optimizer.state_dict() best_learning_rate = learning_rate max_patience = 3000 early_stop = False for step in range(step_start, opts.max_iters): # batch images, image_fns, score_maps, geo_maps, training_masks, gtso, lbso, gt_idxs = next( data_generator) im_data = net_utils.np_to_variable(images, is_cuda=opts.cuda).permute( 0, 3, 1, 2) start = timeit.timeit() try: seg_pred, roi_pred, angle_pred, features = net(im_data) except: import sys, traceback traceback.print_exc(file=sys.stdout) continue end = timeit.timeit() # backward smaps_var = net_utils.np_to_variable(score_maps, is_cuda=opts.cuda) training_mask_var = net_utils.np_to_variable(training_masks, is_cuda=opts.cuda) angle_gt = net_utils.np_to_variable(geo_maps[:, :, :, 4], is_cuda=opts.cuda) geo_gt = net_utils.np_to_variable(geo_maps[:, :, :, [0, 1, 2, 3]], is_cuda=opts.cuda) try: loss = net.loss(seg_pred, smaps_var, training_mask_var, angle_pred, angle_gt, roi_pred, geo_gt) except: import sys, traceback traceback.print_exc(file=sys.stdout) continue bbox_loss += net.box_loss_value.data.cpu().numpy() seg_loss += net.segm_loss_value.data.cpu().numpy() angle_loss += net.angle_loss_value.data.cpu().numpy() train_loss += loss.data.cpu().numpy() optimizer.zero_grad() try: if step > 10000: #this is just extra augumentation step ... in early stage just slows down training ctcl, gt_b_good, gt_b_all = process_boxes(images, im_data, seg_pred[0], roi_pred[0], angle_pred[0], score_maps, gt_idxs, gtso, lbso, features, net, ctc_loss, opts, debug=opts.debug) ctc_loss_val += ctcl.data.cpu().numpy()[0] loss = loss + ctcl gt_all += gt_b_all good_all += gt_b_good imageso, labels, label_length = next(dg_ocr) im_data_ocr = net_utils.np_to_variable(imageso, is_cuda=opts.cuda).permute( 0, 3, 1, 2) features = net.forward_features(im_data_ocr) labels_pred = net.forward_ocr(features) probs_sizes = torch.IntTensor( [(labels_pred.permute(2, 0, 1).size()[0])] * (labels_pred.permute(2, 0, 1).size()[1])) label_sizes = torch.IntTensor( torch.from_numpy(np.array(label_length)).int()) labels = torch.IntTensor(torch.from_numpy(np.array(labels)).int()) loss_ocr = ctc_loss(labels_pred.permute(2, 0, 1), labels, probs_sizes, label_sizes) / im_data_ocr.size(0) * 0.5 loss_ocr.backward() loss.backward() optimizer.step() except: import sys, traceback traceback.print_exc(file=sys.stdout) pass cnt += 1 if step % disp_interval == 0: if opts.debug: segm = seg_pred[0].data.cpu()[0].numpy() segm = segm.squeeze(0) cv2.imshow('segm_map', segm) segm_res = cv2.resize(score_maps[0], (images.shape[2], images.shape[1])) mask = np.argwhere(segm_res > 0) x_data = im_data.data.cpu().numpy()[0] x_data = x_data.swapaxes(0, 2) x_data = x_data.swapaxes(0, 1) x_data += 1 x_data *= 128 x_data = np.asarray(x_data, dtype=np.uint8) x_data = x_data[:, :, ::-1] im_show = x_data try: im_show[mask[:, 0], mask[:, 1], 1] = 255 im_show[mask[:, 0], mask[:, 1], 0] = 0 im_show[mask[:, 0], mask[:, 1], 2] = 0 except: pass cv2.imshow('img0', im_show) cv2.imshow('score_maps', score_maps[0] * 255) cv2.imshow('train_mask', training_masks[0] * 255) cv2.waitKey(10) train_loss /= cnt bbox_loss /= cnt seg_loss /= cnt angle_loss /= cnt ctc_loss_val /= cnt box_loss_val /= cnt if train_loss < best_loss: best_step = step best_model = net.state_dict() best_loss = train_loss best_learning_rate = learning_rate best_optimizer = optimizer.state_dict() if best_step - step > max_patience: print("Early stopped criteria achieved.") save_name = os.path.join( opts.save_path, 'BEST_{}_{}.h5'.format(model_name, best_step)) state = { 'step': best_step, 'learning_rate': best_learning_rate, 'state_dict': best_model, 'optimizer': best_optimizer } torch.save(state, save_name) print('save model: {}'.format(save_name)) opts.max_iters = step early_stop = True try: print( 'epoch %d[%d], loss: %.3f, bbox_loss: %.3f, seg_loss: %.3f, ang_loss: %.3f, ctc_loss: %.3f, rec: %.5f in %.3f' % (step / batch_per_epoch, step, train_loss, bbox_loss, seg_loss, angle_loss, ctc_loss_val, good_all / max(1, gt_all), end - start)) print('max_memory_allocated {}'.format( torch.cuda.max_memory_allocated())) except: import sys, traceback traceback.print_exc(file=sys.stdout) pass train_loss = 0 bbox_loss, seg_loss, angle_loss = 0., 0., 0. cnt = 0 ctc_loss_val = 0 good_all = 0 gt_all = 0 box_loss_val = 0 #if step % valid_interval == 0: # validate(opts.valid_list, net) if step > step_start and (step % batch_per_epoch == 0): save_name = os.path.join(opts.save_path, '{}_{}.h5'.format(model_name, step)) state = { 'step': step, 'learning_rate': learning_rate, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict(), 'max_memory_allocated': torch.cuda.max_memory_allocated() } torch.save(state, save_name) print('save model: {}\tmax memory: {}'.format( save_name, torch.cuda.max_memory_allocated())) if not early_stop: save_name = os.path.join(opts.save_path, '{}.h5'.format(model_name)) state = { 'step': step, 'learning_rate': learning_rate, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(state, save_name) print('save model: {}'.format(save_name))
noise_levels=(args.noise_min, args.noise_max)) rnn_type = args.rnn_type.lower() assert rnn_type in supported_rnns, "rnn_type should be either lstm, rnn or gru" model = DeepSpeech(rnn_hidden_size=args.hidden_size, nb_layers=args.hidden_layers, labels=labels, rnn_type=supported_rnns[rnn_type], audio_conf=audio_conf, bidirectional=args.bidirectional) parameters = model.parameters() optimizer = torch.optim.SGD(parameters, lr=args.lr, momentum=args.momentum, nesterov=True) criterion = CTCLoss() decoder = GreedyDecoder(labels) train_dataset = SpectrogramDataset(audio_conf=audio_conf, manifest_filepath=args.train_manifest, labels=labels, normalize=True, augment=args.augment) test_dataset = SpectrogramDataset(audio_conf=audio_conf, manifest_filepath=args.val_manifest, labels=labels, normalize=True, augment=False) if not args.distributed: train_sampler = BucketingSampler(train_dataset, batch_size=args.batch_size) else:
def train(self, client_data, model, conf): clientId = conf.clientId logging.info(f"Start to train (CLIENT: {clientId}) ...") tokenizer, device = conf.tokenizer, conf.device model = model.to(device=device) model.train() trained_unique_samples = min(len(client_data.dataset), conf.local_steps * conf.batch_size) if conf.gradient_policy == 'prox': global_model = [param.data.clone() for param in model.parameters()] if conf.task == "detection": lr = conf.learning_rate params = [] for key, value in dict(model.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{'params':[value],'lr':lr*(cfg.TRAIN.DOUBLE_BIAS + 1), \ 'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': cfg.TRAIN.WEIGHT_DECAY }] optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM) elif conf.task == 'nlp': no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], "weight_decay": conf.weight_decay, }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ] optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=conf.learning_rate) else: optimizer = torch.optim.SGD(model.parameters(), lr=conf.learning_rate, momentum=0.9, weight_decay=5e-4) if conf.task == 'voice': from torch_baidu_ctc import CTCLoss criterion = CTCLoss(reduction='none').to(device=device) else: criterion = torch.nn.CrossEntropyLoss(reduction='none').to( device=device) epoch_train_loss = 1e-4 error_type = None completed_steps = 0 if conf.task == "detection": im_data = Variable(torch.FloatTensor(1).cuda()) im_info = Variable(torch.FloatTensor(1).cuda()) num_boxes = Variable(torch.LongTensor(1).cuda()) gt_boxes = Variable(torch.FloatTensor(1).cuda()) # TODO: One may hope to run fixed number of epochs, instead of iterations while completed_steps < conf.local_steps: try: for data_pair in client_data: if conf.task == 'nlp': (data, _) = data_pair data, target = mask_tokens(data, tokenizer, conf, device=device) elif conf.task == 'voice': (data, target, input_percentages, target_sizes), _ = data_pair input_sizes = input_percentages.mul_(int( data.size(3))).int() elif conf.task == 'detection': temp_data = data_pair target = temp_data[4] data = temp_data[0:4] else: (data, target) = data_pair if conf.task == "detection": im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) elif conf.task == 'speech': data = torch.unsqueeze(data, 1).to(device=device) else: data = Variable(data).to(device=device) target = Variable(target).to(device=device) if conf.task == 'nlp': outputs = model(data, labels=target) loss = outputs[0] elif conf.task == 'voice': outputs, output_sizes = model(data, input_sizes) outputs = outputs.transpose(0, 1).float() # TxNxH loss = criterion(outputs, target, output_sizes, target_sizes) elif conf.task == "detection": rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = model(im_data, im_info, gt_boxes, num_boxes) loss = rpn_loss_cls + rpn_loss_box \ + RCNN_loss_cls + RCNN_loss_bbox loss_rpn_cls = rpn_loss_cls.item() loss_rpn_box = rpn_loss_box.item() loss_rcnn_cls = RCNN_loss_cls.item() loss_rcnn_box = RCNN_loss_bbox.item() print("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \ % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box)) else: output = model(data) loss = criterion(output, target) # ======== collect training feedback for other decision components [e.g., kuiper selector] ====== if conf.task == 'nlp': loss_list = [loss.item()] #[loss.mean().data.item()] elif conf.task == "detection": loss_list = [loss.tolist()] loss = loss.mean() else: loss_list = loss.tolist() loss = loss.mean() temp_loss = sum([l**2 for l in loss_list]) / float( len(loss_list)) # only measure the loss of the first epoch if completed_steps < len(client_data): if epoch_train_loss == 1e-4: epoch_train_loss = temp_loss else: epoch_train_loss = ( 1. - conf.loss_decay ) * epoch_train_loss + conf.loss_decay * temp_loss # ========= Define the backward loss ============== optimizer.zero_grad() loss.backward() optimizer.step() # ========= Weight handler ======================== if conf.gradient_policy == 'prox': for idx, param in enumerate(model.parameters()): param.data += conf.learning_rate * conf.proxy_mu * ( param.data - global_model[idx]) completed_steps += 1 if completed_steps == conf.local_steps: break except Exception as ex: error_type = ex break model_param = [ param.data.cpu().numpy() for param in model.parameters() ] results = { 'clientId': clientId, 'moving_loss': epoch_train_loss, 'trained_size': completed_steps * conf.batch_size, 'success': completed_steps > 0 } results['utility'] = math.sqrt(epoch_train_loss) * float( trained_unique_samples) if error_type is None: logging.info( f"Training of (CLIENT: {clientId}) completes, {results}") else: logging.info( f"Training of (CLIENT: {clientId}) failed as {error_type}") results['update_weight'] = model_param results['wall_duration'] = 0 return results
xs = torch.tensor([10, 6, 9], dtype=torch.int) # Target lengths ys = torch.tensor([5, 3, 4], dtype=torch.int) # By default, the costs (negative log-likelihood) of all samples are summed. # This is equivalent to: # ctc_loss(x, y, xs, ys, average_frames=False, reduction="sum") loss1 = ctc_loss(x, y, xs, ys) # You can also average the cost of each sample among the number of frames. # The averaged costs are then summed. loss2 = ctc_loss(x, y, xs, ys, average_frames=True) # Instead of summing the costs of each sample, you can perform # other `reductions`: "none", "sum", or "mean" # # Return an array with the loss of each individual sample losses = ctc_loss(x, y, xs, ys, reduction="none") # # Compute the mean of the individual losses loss3 = ctc_loss(x, y, xs, ys, reduction="mean") # # First, normalize loss by number of frames, later average losses loss4 = ctc_loss(x, y, xs, ys, average_frames=True, reduction="mean") # Finally, there's also a nn.Module to use this loss. ctc = CTCLoss(average_frames=True, reduction="mean", blank=0) loss4_2 = ctc(x, y, xs, ys) # Note: the `blank` option is also available for `ctc_loss`. # By default it is 0.
def main(args): if args.checkpoint == '': args.checkpoint = 'checkpoints' print(('checkpoint path: %s' % args.checkpoint)) print(('init lr: %.8f' % args.lr)) sys.stdout.flush() if not os.path.isdir(args.checkpoint): os.makedirs(args.checkpoint) start_step = 0 train_loader = dataset.get_batch(num_workers=args.num_workers, input_dirs=args.input_dirs, input_size=args.input_size, batch_size=args.batch_size, vis=args.debug) # Load OCR dataset ocr_loader = ocr_dataset.get_batch(num_workers=2, input_list=args.ocr_input_list, batch_size=args.ocr_batch_size, norm_height=args.norm_height) model = resnet50(pretrained=True).cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) if args.resume: print('Resuming from checkpoint.') assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' checkpoint = torch.load(args.resume) start_step = checkpoint['step'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) else: print('Training from scratch.') model.train() ctc_loss = CTCLoss() train_loss_val, counter = 0, 0 ctc_loss_val = 0 good_all = 0 gt_all = 0 for step in range(start_step, args.max_iterators): # Localization data loader images_org, score_maps, geo_maps, training_masks, gt_outputs, label_outputs = next(train_loader) images = np_to_variable(images_org).permute(0, 3, 1, 2) score_maps = np_to_variable(score_maps).permute(0, 3, 1, 2) training_masks = np_to_variable(training_masks).permute(0, 3, 1, 2) geo_maps = np_to_variable(geo_maps) # Zero the parameter gradients optimizer.zero_grad() # Train the network localization and recognition score_pred, geo_pred = model(images) # Calculating the Loss loss = losses.loss(score_maps, score_pred, geo_maps, geo_pred, training_masks) train_loss_val += loss.item() try: if step > 10000: recog_loss, gt_b_good, gt_b_all = recognizer(images_org, images, gt_outputs, label_outputs, model, ctc_loss, args.norm_height, args.ocr_debug) ctc_loss_val += recog_loss.item() loss = loss + recog_loss gt_all += gt_b_all good_all += gt_b_good except Exception: pass # Recognition data loader ocr_images, labels, labels_length = next(ocr_loader) ocr_images = np_to_variable(ocr_images).permute(0, 3, 1, 2) labels_pred = model.forward_ocr(ocr_images) probs_sizes = torch.IntTensor( [(labels_pred.permute(2, 0, 1).size()[0])] * (labels_pred.permute(2, 0, 1).size()[1])) label_sizes = torch.IntTensor(torch.from_numpy(np.array(labels_length)).int()) labels = torch.IntTensor(torch.from_numpy(np.array(labels)).int()) loss_ocr = ctc_loss(labels_pred.permute(2, 0, 1), labels, probs_sizes, label_sizes) / ocr_images.size(0) * 0.5 # Calculating the Gradients loss_ocr.backward() loss.backward() # Update the weights optimizer.step() counter += 1 if (step + 1) % print_interval == 0: train_loss_val /= counter ctc_loss_val /= counter print('\nEpoch: %d[%d] | LR: %f | Loss: %.3f | CTC_Loss: %.3f | Rec: %.5f' % ( (step + 1) / batch_per_epoch, step + 1, optimizer.param_groups[0]['lr'], train_loss_val, ctc_loss_val, good_all / max(1, gt_all))) train_loss_val, counter = 0, 0 ctc_loss_val = 0 good_all = 0 gt_all = 0 if (step + 1) % batch_per_epoch == 0: checkpoint_file_name = 'LS1706203-{}.h5'.format(step + 1) save_checkpoint({ 'step': step + 1, 'state_dict': model.state_dict(), 'lr': args.lr, 'optimizer': optimizer.state_dict(), }, checkpoint=args.checkpoint, filename=checkpoint_file_name)
def train(opt): """ dataset preparation """ opt.select_data = opt.select_data.split('-') opt.batch_ratio = opt.batch_ratio.split('-') train_dataset = Batch_Balanced_Dataset(opt) AlignCollate_valid = AlignCollate(imgH=opt.imgH, imgW=opt.imgW) valid_dataset = hierarchical_dataset(root=opt.valid_data, opt=opt) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=opt.batch_size, shuffle=True, # 'True' to check training progress with validation function. num_workers=int(opt.workers), collate_fn=AlignCollate_valid, pin_memory=True) print('-' * 80) """ model configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) # weight initialization for name, param in model.named_parameters(): if 'localization_fc2' in name: print(f'Skip {name} as it is already initialized') continue try: if 'bias' in name: init.constant_(param, 0.0) elif 'weight' in name: init.kaiming_normal_(param) except Exception as e: # for batchnorm. if 'weight' in name: param.data.fill_(1) continue # data parallel for multi-GPU model = torch.nn.DataParallel(model).cuda() model.train() if opt.continue_model != '': print(f'loading pretrained model from {opt.continue_model}') model.load_state_dict(torch.load(opt.continue_model)) print("Model:") print(model) """ setup loss """ if 'CTC' in opt.Prediction: criterion = CTCLoss(reduction='sum') else: criterion = torch.nn.CrossEntropyLoss(ignore_index=0).cuda() # ignore [GO] token = ignore index 0 # loss averager loss_avg = Averager() # filter that only require gradient decent filtered_parameters = [] params_num = [] for p in filter(lambda p: p.requires_grad, model.parameters()): filtered_parameters.append(p) params_num.append(np.prod(p.size())) print('Trainable params num : ', sum(params_num)) # [print(name, p.numel()) for name, p in filter(lambda p: p[1].requires_grad, model.named_parameters())] # setup optimizer if opt.adam: optimizer = optim.Adam(filtered_parameters, lr=opt.lr, betas=(opt.beta1, 0.999)) else: optimizer = optim.Adadelta(filtered_parameters, lr=opt.lr, rho=opt.rho, eps=opt.eps) print("Optimizer:") print(optimizer) """ final options """ # print(opt) with open(f'./saved_models/{opt.experiment_name}/opt.txt', 'a') as opt_file: opt_log = '------------ Options -------------\n' args = vars(opt) for k, v in args.items(): opt_log += f'{str(k)}: {str(v)}\n' opt_log += '---------------------------------------\n' print(opt_log) opt_file.write(opt_log) """ start training """ start_iter = 0 if opt.continue_model != '': start_iter = int(opt.continue_model.split('_')[-1].split('.')[0]) print(f'continue to train, start_iter: {start_iter}') start_time = time.time() best_accuracy = -1 best_norm_ED = 1e+6 i = start_iter while(True): # train part for p in model.parameters(): p.requires_grad = True cpu_images, cpu_texts = train_dataset.get_batch() image = cpu_images.cuda() text, length = converter.encode(cpu_texts) batch_size = image.size(0) if 'CTC' in opt.Prediction: preds = model(image, text) preds_size = torch.IntTensor([preds.size(1)] * batch_size) preds = preds.permute(1, 0, 2) # to use CTCLoss format cost = criterion(preds, text, preds_size, length) / batch_size else: preds = model(image, text) target = text[:, 1:] # without [GO] Symbol cost = criterion(preds.view(-1, preds.shape[-1]), target.contiguous().view(-1)) model.zero_grad() cost.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) # gradient clipping with 5 (Default) optimizer.step() loss_avg.add(cost) # validation part if i % opt.valInterval == 0: elapsed_time = time.time() - start_time print(f'[{i}/{opt.num_iter}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}') # for log with open(f'./saved_models/{opt.experiment_name}/log_train.txt', 'a') as log: log.write(f'[{i}/{opt.num_iter}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}\n') loss_avg.reset() model.eval() valid_loss, current_accuracy, current_norm_ED, preds, gts, infer_time = validation( model, criterion, valid_loader, converter, opt) model.train() for pred, gt in zip(preds[:5], gts[:5]): if 'CTC' not in opt.Prediction: pred = pred[:pred.find('[s]')] gt = gt[:gt.find('[s]')] print(f'{pred:20s}, gt: {gt:20s}, {str(pred == gt)}') log.write(f'{pred:20s}, gt: {gt:20s}, {str(pred == gt)}\n') valid_log = f'[{i}/{opt.num_iter}] valid loss: {valid_loss:0.5f}' valid_log += f' accuracy: {current_accuracy:0.3f}, norm_ED: {current_norm_ED:0.2f}' print(valid_log) log.write(valid_log + '\n') # keep best accuracy model if current_accuracy > best_accuracy: best_accuracy = current_accuracy torch.save(model.state_dict(), f'./saved_models/{opt.experiment_name}/best_accuracy.pth') if current_norm_ED < best_norm_ED: best_norm_ED = current_norm_ED torch.save(model.state_dict(), f'./saved_models/{opt.experiment_name}/best_norm_ED.pth') best_model_log = f'best_accuracy: {best_accuracy:0.3f}, best_norm_ED: {best_norm_ED:0.2f}' print(best_model_log) log.write(best_model_log + '\n') # save model per 1e+5 iter. if (i + 1) % 1e+5 == 0: torch.save( model.state_dict(), f'./saved_models/{opt.experiment_name}/iter_{i+1}.pth') if i == opt.num_iter: print('end the training') sys.exit() i += 1
def main(opts): model_name = 'OctGatedMLT' net = OctMLT(attention=True) acc = [] if opts.cuda: net.cuda() optimizer = torch.optim.Adam(net.parameters(), lr=base_lr, weight_decay=weight_decay) step_start = 0 if os.path.exists(opts.model): print('loading model from %s' % args.model) step_start, learning_rate = net_utils.load_net( args.model, net, optimizer, load_ocr=opts.load_ocr, load_detection=opts.load_detection, load_shared=opts.load_shared, load_optimizer=opts.load_optimizer, reset_step=opts.load_reset_step) else: learning_rate = base_lr step_start = 0 net.train() if opts.freeze_shared: net_utils.freeze_shared(net) if opts.freeze_ocr: net_utils.freeze_ocr(net) if opts.freeze_detection: net_utils.freeze_detection(net) #acc_test = test(net, codec, opts, list_file=opts.valid_list, norm_height=opts.norm_height) #acc.append([0, acc_test]) ctc_loss = CTCLoss() data_generator = ocr_gen.get_batch(num_workers=opts.num_readers, batch_size=opts.batch_size, train_list=opts.train_list, in_train=True, norm_height=opts.norm_height, rgb=True) train_loss = 0 cnt = 0 for step in range(step_start, 300000): # batch images, labels, label_length = next(data_generator) im_data = net_utils.np_to_variable(images, is_cuda=opts.cuda).permute( 0, 3, 1, 2) features = net.forward_features(im_data) labels_pred = net.forward_ocr(features) # backward ''' acts: Tensor of (seqLength x batch x outputDim) containing output from network labels: 1 dimensional Tensor containing all the targets of the batch in one sequence act_lens: Tensor of size (batch) containing size of each output sequence from the network act_lens: Tensor of (batch) containing label length of each example ''' probs_sizes = torch.IntTensor( [(labels_pred.permute(2, 0, 1).size()[0])] * (labels_pred.permute(2, 0, 1).size()[1])) label_sizes = torch.IntTensor( torch.from_numpy(np.array(label_length)).int()) labels = torch.IntTensor(torch.from_numpy(np.array(labels)).int()) loss = ctc_loss(labels_pred.permute(2, 0, 1), labels, probs_sizes, label_sizes) / im_data.size(0) # change 1.9. optimizer.zero_grad() loss.backward() optimizer.step() if not np.isinf(loss.data.cpu().numpy()): train_loss += loss.data.cpu().numpy()[0] if isinstance( loss.data.cpu().numpy(), list) else loss.data.cpu().numpy( ) #net.bbox_loss.data.cpu().numpy()[0] cnt += 1 if opts.debug: dbg = labels_pred.data.cpu().numpy() ctc_f = dbg.swapaxes(1, 2) labels = ctc_f.argmax(2) det_text, conf, dec_s = print_seq_ext(labels[0, :], codec) print('{0} \t'.format(det_text)) if step % disp_interval == 0: train_loss /= cnt print('epoch %d[%d], loss: %.3f, lr: %.5f ' % (step / batch_per_epoch, step, train_loss, learning_rate)) train_loss = 0 cnt = 0 if step > step_start and (step % batch_per_epoch == 0): save_name = os.path.join(opts.save_path, '{}_{}.h5'.format(model_name, step)) state = { 'step': step, 'learning_rate': learning_rate, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(state, save_name) print('save model: {}'.format(save_name)) #acc_test, ted = test(net, codec, opts, list_file=opts.valid_list, norm_height=opts.norm_height) #acc.append([0, acc_test, ted]) np.savez('train_acc_{0}'.format(model_name), acc=acc)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=cfg.batchSize, shuffle=True, sampler=sampler, num_workers=int(cfg.workers), collate_fn=dataset.alignCollate( imgH=cfg.imgH, imgW=cfg.imgW, keep_ratio=cfg.keep_ratio, cuda=cfg.cuda)) converter = utils.strLabelConverter(cfg.dic_path) # 2:loss criterion = CTCLoss() # 3:模型 crnn = crnn.CRNN(cfg.imgH, cfg.nc, cfg.nclass, cfg.nh) if cfg.cuda: crnn.cuda() criterion = criterion.cuda() crnn.apply(weights_init) if cfg.loadCheckpoint != None: print('loading pretrained model from %s' % cfg.loadCheckpoint) crnn.load_state_dict(torch.load(cfg.loadCheckpoint)) # 4:优化方式 if cfg.adam: optimizer = optim.Adam(crnn.parameters(),
def main(data_path, abc, seq_proj, backend, snapshot, input_size, base_lr, step_size, max_iter, batch_size, output_dir, test_epoch, test_init, gpu): print(abc) os.environ["CUDA_VISIBLE_DEVICES"] = gpu cuda = True if gpu is not '' else False input_size = [int(x) for x in input_size.split('x')] transform = Compose([ # Rotation(), Translation(), # Scale(), Resize(size=(input_size[0], input_size[1])) ]) if data_path is not None: data = TextDataset(data_path=data_path, mode="train", transform=transform, abc=abc) else: data = TestDataset(transform=transform, abc=abc) seq_proj = [int(x) for x in seq_proj.split('x')] net = load_model(data.get_abc(), seq_proj, backend, snapshot, cuda) optimizer = optim.Adam(net.parameters(), lr = base_lr, weight_decay=0.0001) lr_scheduler = StepLR(optimizer, step_size=step_size, max_iter=max_iter) loss_function = CTCLoss(average_frames=True, reduction="mean", blank=0) acc_best = 0 epoch_count = 0 while True: if (test_epoch is not None and epoch_count != 0 and epoch_count % test_epoch == 0) or (test_init and epoch_count == 0): print("Test phase") data.set_mode("test") net = net.eval() acc, avg_ed = test(net, data, data.get_abc(), cuda, visualize=False) net = net.train() data.set_mode("train") if acc > acc_best: if output_dir is not None: torch.save(net.state_dict(), os.path.join(output_dir, "crnn_" + backend + "_" + str(data.get_abc()) + "_best")) acc_best = acc print("acc: {}\tacc_best: {}; avg_ed: {}".format(acc, acc_best, avg_ed)) data_loader = DataLoader(data, batch_size=batch_size, num_workers=1, shuffle=True, collate_fn=text_collate) loss_mean = [] iterator = tqdm(data_loader) iter_count = 0 save_num = 0 for sample in iterator: # for multi-gpu support if sample["img"].size(0) % len(gpu.split(',')) != 0: continue optimizer.zero_grad() imgs = Variable(sample["img"]) # img = imgs[0] # print(img.shape) # cv2.imshow('image', img.numpy().transpose(1, 2, 0)) # cv2.waitKey(0) labels = Variable(sample["seq"]).view(-1) label_lens = Variable(sample["seq_len"].int()) if cuda: imgs = imgs.cuda() preds = net(imgs).cpu() pred_lens = Variable(Tensor([preds.size(0)] * batch_size).int()) loss = loss_function(preds, labels, pred_lens, label_lens) / batch_size loss.backward() nn.utils.clip_grad_norm(net.parameters(), 10.0) loss_mean.append(loss.data[0]) status = "epoch: {}; iter: {}; lr: {}; loss_mean: {}; loss: {}".format(epoch_count, lr_scheduler.last_iter+1, lr_scheduler.get_lr(), np.mean(loss_mean), loss.data[0]) iterator.set_description(status) optimizer.step() lr_scheduler.step() iter_count += 1 if output_dir is not None and iter_count % 500 == 0: torch.save(net.state_dict(), os.path.join(output_dir, "crnn_" + save_num + "_" + str(loss) + "_last")) save_num += 1 if output_dir is not None: torch.save(net.state_dict(), os.path.join(output_dir, "crnn_" + backend + "_" + str(data.get_abc()) + "_last")) epoch_count += 1 return