def main(): config = parse_arg() if torch.cuda.is_available(): torch.backends.cudnn.benchmark = True device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = get_crnn(config).to(device) if config.TRAIN.RESUME.IS_RESUME: model_state_file = config.TRAIN.RESUME.FILE if model_state_file != '' and os.path.exists(model_state_file): print('loading pretrained model from %s' % model_state_file) model.load_state_dict(torch.load(model_state_file)) criterion = torch.nn.CTCLoss(reduction='sum').to(device) val_dataset = OcrDataset(config, is_train=False) val_dataloader = data.DataLoader( dataset=val_dataset, batch_size=config.TEST.BATCH_SIZE_PER_GPU, shuffle=config.TEST.SHUFFLE, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY, ) converter = utils.strLabelConverter(alphabet) acc = validate(config, val_dataloader, converter, model, criterion, device)
def __init__(self, model_path='./checkpoints/CRNN.pth'): self.alphabet = ''.join([chr(uni) for uni in crnn_params.alphabet]) self.nclass = len(self.alphabet) + 1 self.model = CRNN(crnn_params.imgH, 1, self.nclass, 256) self.use_gpu = torch.cuda.is_available() if self.use_gpu: self.model.cuda() self.model.load_state_dict(torch.load(model_path)) for p in self.model.parameters(): p.requires_grad = False self.model.eval() self.converter = strLabelConverter(self.alphabet)
def __init__(self, model_path): #def crnnSource(model_path, use_gpu=True): alphabet = keys.alphabet # Chinese words self.converter = crnn_utils.strLabelConverter(alphabet) # note that in https://github.com/bear63/sceneReco support multi GPU. # model = crnn.CRNN(32, 1, len(alphabet)+1, 256, 1).cuda() self.model = crnn.CRNN(32, 1, len(alphabet) + 1, 256) self.cpu_model = crnn.CRNN(32, 1, len(alphabet) + 1, 256) if torch.cuda.is_available(): self.model = self.model.cuda() print('loading pretrained model from %s' % model_path) #model_path = './crnn/samples/netCRNN63.pth' model_state_dict = torch.load(model_path) self.model.load_state_dict(model_state_dict) self.cpu_model.load_state_dict(model_state_dict)
def val(model, loader, criterion, device): print('Start val') for p in model.parameters(): p.requires_grad = False model.eval() loss_avg = utils.averager() alphabet = ''.join([chr(uni) for uni in crnn_params.alphabet]) converter = utils.strLabelConverter(alphabet) n_total = 0 n_correct = 0 preds = 0 # for i_batch, (image, label, index) in enumerate(loader): for i_batch, (image, label, index) in tqdm(enumerate(loader), total=len(loader), desc='test model'): image = image.to(device) preds = model(image) batch_size = image.size(0) index = np.array(index.data.numpy()) text, length = converter.encode(label) preds_size = torch.IntTensor([preds.size(0)] * batch_size) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) for pred, target in zip(sim_preds, label): if pred == target: n_correct += 1 n_total += batch_size raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:crnn_params.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, label): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) accuracy = n_correct / float(n_total) print('Test loss: %.6f, accuray: %.6f' % (loss_avg.val(), accuracy)) return accuracy
def __init__(self, *args, **kwargs): super(ModelIinit, self).__init__(*args, **kwargs) if self.model_params["model_type"] == "crnn_big_size": self.model = crnn_big_size.CRNN( nc=self.model_params["num_input_channels"], nclass=self.nclass, nh=self.model_params["hid_layer_size"]) self.converter = crnn_utils.strLabelConverter( self.model_params["alphabet"]) self.criterion = CTCLoss(zero_infinity=True).to( self.general_params["device"]) self.model.apply(self.weights_init) '''load pretrained weigths''' path_to_pretrained_model = self.model_params[ self.model_params["model_type"]]["path_pretrained"] if path_to_pretrained_model and os.path.isfile( path_to_pretrained_model): print('loading pretrained model') self.model.load_state_dict(path_to_pretrained_model) self.model.to(self.general_params["device"]) self.model = torch.nn.DataParallel(self.model, device_ids=range( self.general_params["num_gpu"])) '''optimizer initialise''' if self.model_params["optimizer"] == "Adam": self.optimizer = optim.Adam(self.model.parameters(), lr=self.model_params["adam"]["lr"], betas=(self.model_params["adam"]["lr"], 0.999)) elif self.model_params["optimizer"] == "adadelta": self.optimizer = optim.Adadelta(self.model.parameters(), lr=self.model_params["adam"]["lr"]) else: self.optimizer = optim.RMSprop(self.model.parameters(), lr=self.model_params["adam"]["lr"])
def train(model, loader, criterion, optimizer, iteration, device): for p in model.parameters(): p.requires_grad = True model.train() loss_avg = utils.averager() alphabet = ''.join([chr(uni) for uni in crnn_params.alphabet]) converter = utils.strLabelConverter(alphabet) for i_batch, (image, label, index) in enumerate(loader): image = image.to(device) preds = model(image) batch_size = image.size(0) text, length = converter.encode(label) preds_size = torch.IntTensor([preds.size(0)] * batch_size) cost = criterion(preds.log_softmax(2), text, preds_size, length) / batch_size model.zero_grad() cost.backward() optimizer.step() loss_avg.add(cost) if (i_batch+1) % crnn_params.displayInterval == 0: theTime = datetime.datetime.now() print('%s [%d/%d][%d/%d] Loss: %f' % (theTime, iteration, crnn_params.niter, i_batch, len(loader), loss_avg.val())) loss_avg.reset()
model_path = './checkpoints/CRNN.pth' alphabet = keys.alphabet imgH = 32 imgW = 280 model = crnn.CRNN(imgH, 1, len(alphabet) + 1, 256) if gpu: model = model.cuda() print('loading pretrained model from %s' % model_path) if gpu: model.load_state_dict( torch.load( model_path ) ) else: model.load_state_dict(torch.load(model_path,map_location=lambda storage,loc:storage)) model.eval() print('done') print('starting...') converter = crnn_utils.strLabelConverter(alphabet) transformer = mydataset.resizeNormalize3((imgW, imgH)) def recognize_cv2_image(img): img = cv2.cvtColor( img, cv2.COLOR_BGR2RGB ) image = Image.fromarray(np.uint8(img)).convert('L') image = transformer( image ) if gpu: image = image.cuda() preds = model( image ) preds = F.log_softmax(preds,2) conf, preds = preds.max( 2 ) preds = preds.transpose( 1, 0 ).contiguous().view( -1 ) preds_size = Variable( torch.IntTensor( [preds.size( 0 )] ) )
print('alphabet length : ', config.MODEL.NUM_CLASSES) if torch.cuda.is_available(): torch.backends.cudnn.benchmark = True device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = get_crnn(config).to(device) if args.model_path != '' and os.path.exists(args.model_path): print('loading pretrained model from %s' % args.model_path) model.load_state_dict(torch.load(args.model_path)) image = Image.open(args.image_name).convert("L") w, h = image.size new_w = int(w / h * config.MODEL.IMAGE_SIZE.H) image = image.resize((new_w, config.MODEL.IMAGE_SIZE.H)) image = np.array(image).astype(np.float32) image = (image / 255.0 - config.DATASET.MEAN) / config.DATASET.STD image = np.expand_dims(image, axis=0) image = np.expand_dims(image, axis=0) image = torch.from_numpy(image) converter = utils.strLabelConverter(alphabet) model.eval() with torch.no_grad(): image = image.to(device) preds = model(image) preds_size = torch.IntTensor([preds.size(0)] * image.size(0)) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) print(sim_preds)
def main(): config = parse_arg() output_dict = utils.create_log_folder(config, phase='train') # writer dict writer_dict = { 'writer': SummaryWriter(log_dir=output_dict['tb_dir']), 'train_global_steps': 0, 'valid_global_steps': 0, } last_epoch = config.TRAIN.BEGIN_EPOCH if torch.cuda.is_available(): torch.backends.cudnn.benchmark = True device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = get_crnn(config).to(device) if config.TRAIN.RESUME.IS_RESUME: model_state_file = config.TRAIN.RESUME.FILE if model_state_file != '' and os.path.exists(model_state_file): print('loading pretrained model from %s' % model_state_file) model.load_state_dict(torch.load(model_state_file)) criterion = torch.nn.CTCLoss(reduction='sum').to(device) optimizer = optim.Adam(model.parameters(), lr=config.TRAIN.LR) if isinstance(config.TRAIN.LR_STEP, list): lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch - 1) else: lr_scheduler = torch.optim.lr_scheduler.StepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch - 1) train_dataset = OcrDataset(config, is_train=True) train_dataloader = data.DataLoader( dataset=train_dataset, batch_size=config.TRAIN.BATCH_SIZE_PER_GPU, shuffle=config.TRAIN.SHUFFLE, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY, ) val_dataset = OcrDataset(config, is_train=False) val_dataloader = data.DataLoader( dataset=val_dataset, batch_size=config.TEST.BATCH_SIZE_PER_GPU, shuffle=config.TEST.SHUFFLE, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY, ) best_acc = 0.01 converter = utils.strLabelConverter(alphabet) for epoch in range(last_epoch, config.TRAIN.END_EPOCH): train(config, train_dataloader, converter, model, criterion, optimizer, device, epoch, writer_dict) lr_scheduler.step() acc = validate(config, val_dataloader, converter, model, criterion, device, epoch, writer_dict) if acc > best_acc: best_acc = acc torch.save(model.state_dict(), '{0}/crnn_Rec_done_{1:04d}_{2:.4f}.pth'.format(output_dict['chs_dir'], epoch, acc)) torch.save(model.state_dict(), '{0}/crnn_Rec_best.pth'.format(output_dict['chs_dir'])) writer_dict['writer'].close()