Exemplo n.º 1
0
def main():
    config = parse_arg()

    if torch.cuda.is_available():
        torch.backends.cudnn.benchmark = True
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model = get_crnn(config).to(device)
    if config.TRAIN.RESUME.IS_RESUME:
        model_state_file = config.TRAIN.RESUME.FILE
        if model_state_file != '' and os.path.exists(model_state_file):
            print('loading pretrained model from %s' % model_state_file)
            model.load_state_dict(torch.load(model_state_file))

    criterion = torch.nn.CTCLoss(reduction='sum').to(device)

    val_dataset = OcrDataset(config, is_train=False)
    val_dataloader = data.DataLoader(
        dataset=val_dataset,
        batch_size=config.TEST.BATCH_SIZE_PER_GPU,
        shuffle=config.TEST.SHUFFLE,
        num_workers=config.WORKERS,
        pin_memory=config.PIN_MEMORY,
    )

    converter = utils.strLabelConverter(alphabet)
    acc = validate(config, val_dataloader, converter, model, criterion, device)
Exemplo n.º 2
0
 def __init__(self, model_path='./checkpoints/CRNN.pth'):
     self.alphabet = ''.join([chr(uni) for uni in crnn_params.alphabet])
     self.nclass = len(self.alphabet) + 1
     self.model = CRNN(crnn_params.imgH, 1, self.nclass, 256)
     self.use_gpu = torch.cuda.is_available()
     if self.use_gpu:
         self.model.cuda()
     self.model.load_state_dict(torch.load(model_path))
     for p in self.model.parameters():
         p.requires_grad = False
     self.model.eval()
     self.converter = strLabelConverter(self.alphabet)
Exemplo n.º 3
0
 def __init__(self, model_path):
     #def crnnSource(model_path, use_gpu=True):
     alphabet = keys.alphabet  # Chinese words
     self.converter = crnn_utils.strLabelConverter(alphabet)
     # note that in https://github.com/bear63/sceneReco support multi GPU.
     # model = crnn.CRNN(32, 1, len(alphabet)+1, 256, 1).cuda()
     self.model = crnn.CRNN(32, 1, len(alphabet) + 1, 256)
     self.cpu_model = crnn.CRNN(32, 1, len(alphabet) + 1, 256)
     if torch.cuda.is_available():
         self.model = self.model.cuda()
     print('loading pretrained model from %s' % model_path)
     #model_path = './crnn/samples/netCRNN63.pth'
     model_state_dict = torch.load(model_path)
     self.model.load_state_dict(model_state_dict)
     self.cpu_model.load_state_dict(model_state_dict)
Exemplo n.º 4
0
def val(model, loader, criterion, device):
    print('Start val')
    for p in model.parameters():
        p.requires_grad = False
    model.eval()

    loss_avg = utils.averager()
    alphabet = ''.join([chr(uni) for uni in crnn_params.alphabet])
    converter = utils.strLabelConverter(alphabet)
    n_total = 0
    n_correct = 0
    preds = 0
    # for i_batch, (image, label, index) in enumerate(loader):
    for i_batch, (image, label, index) in tqdm(enumerate(loader),
                                               total=len(loader),
                                               desc='test model'):
        image = image.to(device)
        preds = model(image)
        batch_size = image.size(0)
        index = np.array(index.data.numpy())
        text, length = converter.encode(label)
        preds_size = torch.IntTensor([preds.size(0)] * batch_size)
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)
        _, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, label):
            if pred == target:
                n_correct += 1

        n_total += batch_size

    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:crnn_params.n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, label):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    accuracy = n_correct / float(n_total)
    print('Test loss: %.6f, accuray: %.6f' % (loss_avg.val(), accuracy))

    return accuracy
Exemplo n.º 5
0
    def __init__(self, *args, **kwargs):
        super(ModelIinit, self).__init__(*args, **kwargs)

        if self.model_params["model_type"] == "crnn_big_size":

            self.model = crnn_big_size.CRNN(
                nc=self.model_params["num_input_channels"],
                nclass=self.nclass,
                nh=self.model_params["hid_layer_size"])

        self.converter = crnn_utils.strLabelConverter(
            self.model_params["alphabet"])
        self.criterion = CTCLoss(zero_infinity=True).to(
            self.general_params["device"])
        self.model.apply(self.weights_init)
        '''load pretrained weigths'''
        path_to_pretrained_model = self.model_params[
            self.model_params["model_type"]]["path_pretrained"]

        if path_to_pretrained_model and os.path.isfile(
                path_to_pretrained_model):
            print('loading pretrained model')
            self.model.load_state_dict(path_to_pretrained_model)

        self.model.to(self.general_params["device"])
        self.model = torch.nn.DataParallel(self.model,
                                           device_ids=range(
                                               self.general_params["num_gpu"]))
        '''optimizer initialise'''
        if self.model_params["optimizer"] == "Adam":
            self.optimizer = optim.Adam(self.model.parameters(),
                                        lr=self.model_params["adam"]["lr"],
                                        betas=(self.model_params["adam"]["lr"],
                                               0.999))

        elif self.model_params["optimizer"] == "adadelta":
            self.optimizer = optim.Adadelta(self.model.parameters(),
                                            lr=self.model_params["adam"]["lr"])
        else:
            self.optimizer = optim.RMSprop(self.model.parameters(),
                                           lr=self.model_params["adam"]["lr"])
Exemplo n.º 6
0
def train(model, loader, criterion, optimizer, iteration, device):
    for p in model.parameters():
        p.requires_grad = True
    model.train()

    loss_avg = utils.averager()
    alphabet = ''.join([chr(uni) for uni in crnn_params.alphabet])
    converter = utils.strLabelConverter(alphabet)
    for i_batch, (image, label, index) in enumerate(loader):
        image = image.to(device)
        preds = model(image)
        batch_size = image.size(0)
        text, length = converter.encode(label)
        preds_size = torch.IntTensor([preds.size(0)] * batch_size)
        cost = criterion(preds.log_softmax(2), text, preds_size, length) / batch_size
        model.zero_grad()
        cost.backward()
        optimizer.step()
        loss_avg.add(cost)

        if (i_batch+1) % crnn_params.displayInterval == 0:
            theTime = datetime.datetime.now()
            print('%s [%d/%d][%d/%d] Loss: %f' % (theTime, iteration, crnn_params.niter, i_batch, len(loader), loss_avg.val()))
            loss_avg.reset()
Exemplo n.º 7
0
model_path = './checkpoints/CRNN.pth'
alphabet = keys.alphabet
imgH = 32
imgW = 280
model = crnn.CRNN(imgH, 1, len(alphabet) + 1, 256)
if gpu:
    model = model.cuda()
print('loading pretrained model from %s' % model_path)
if gpu:
    model.load_state_dict( torch.load( model_path ) )
else:
    model.load_state_dict(torch.load(model_path,map_location=lambda storage,loc:storage))
model.eval()
print('done')
print('starting...')
converter = crnn_utils.strLabelConverter(alphabet)
transformer = mydataset.resizeNormalize3((imgW, imgH))

def recognize_cv2_image(img):
    img = cv2.cvtColor( img, cv2.COLOR_BGR2RGB )
    image = Image.fromarray(np.uint8(img)).convert('L')
    image = transformer( image )
    if gpu:
        image = image.cuda()

    preds = model( image )
    preds = F.log_softmax(preds,2)
    conf, preds = preds.max( 2 )
    preds = preds.transpose( 1, 0 ).contiguous().view( -1 )

    preds_size = Variable( torch.IntTensor( [preds.size( 0 )] ) )
Exemplo n.º 8
0
    print('alphabet length : ', config.MODEL.NUM_CLASSES)

    if torch.cuda.is_available():
        torch.backends.cudnn.benchmark = True
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model = get_crnn(config).to(device)
    if args.model_path != '' and os.path.exists(args.model_path):
        print('loading pretrained model from %s' % args.model_path)
        model.load_state_dict(torch.load(args.model_path))

    image = Image.open(args.image_name).convert("L")
    w, h = image.size
    new_w = int(w / h * config.MODEL.IMAGE_SIZE.H)
    image = image.resize((new_w, config.MODEL.IMAGE_SIZE.H))
    image = np.array(image).astype(np.float32)
    image = (image / 255.0 - config.DATASET.MEAN) / config.DATASET.STD
    image = np.expand_dims(image, axis=0)
    image = np.expand_dims(image, axis=0)
    image = torch.from_numpy(image)

    converter = utils.strLabelConverter(alphabet)
    model.eval()
    with torch.no_grad():
        image = image.to(device)
        preds = model(image)
        preds_size = torch.IntTensor([preds.size(0)] * image.size(0))
        _, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        print(sim_preds)
Exemplo n.º 9
0
def main():
    config = parse_arg()

    output_dict = utils.create_log_folder(config, phase='train')
    # writer dict
    writer_dict = {
        'writer': SummaryWriter(log_dir=output_dict['tb_dir']),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }
    last_epoch = config.TRAIN.BEGIN_EPOCH

    if torch.cuda.is_available():
        torch.backends.cudnn.benchmark = True
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model = get_crnn(config).to(device)
    if config.TRAIN.RESUME.IS_RESUME:
        model_state_file = config.TRAIN.RESUME.FILE
        if model_state_file != '' and os.path.exists(model_state_file):
            print('loading pretrained model from %s' % model_state_file)
            model.load_state_dict(torch.load(model_state_file))

    criterion = torch.nn.CTCLoss(reduction='sum').to(device)
    optimizer = optim.Adam(model.parameters(), lr=config.TRAIN.LR)
    if isinstance(config.TRAIN.LR_STEP, list):
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch - 1)
    else:
        lr_scheduler = torch.optim.lr_scheduler.StepLR(
            optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch - 1)
    
    train_dataset = OcrDataset(config, is_train=True)
    train_dataloader = data.DataLoader(
        dataset=train_dataset,
        batch_size=config.TRAIN.BATCH_SIZE_PER_GPU,
        shuffle=config.TRAIN.SHUFFLE,
        num_workers=config.WORKERS,
        pin_memory=config.PIN_MEMORY,
    )

    val_dataset = OcrDataset(config, is_train=False)
    val_dataloader = data.DataLoader(
        dataset=val_dataset,
        batch_size=config.TEST.BATCH_SIZE_PER_GPU,
        shuffle=config.TEST.SHUFFLE,
        num_workers=config.WORKERS,
        pin_memory=config.PIN_MEMORY,
    )

    best_acc = 0.01
    converter = utils.strLabelConverter(alphabet)
    for epoch in range(last_epoch, config.TRAIN.END_EPOCH):
        train(config, train_dataloader, converter, model, criterion, optimizer, device, epoch, writer_dict)
        lr_scheduler.step()
        acc = validate(config, val_dataloader, converter, model, criterion, device, epoch, writer_dict)
        if acc > best_acc:
            best_acc = acc
            torch.save(model.state_dict(), '{0}/crnn_Rec_done_{1:04d}_{2:.4f}.pth'.format(output_dict['chs_dir'], epoch, acc))
            torch.save(model.state_dict(), '{0}/crnn_Rec_best.pth'.format(output_dict['chs_dir']))
    
    writer_dict['writer'].close()