Exemplo n.º 1
0
def train():
    print('start training ...........')
    batch_size = 16
    num_epochs = 50
    learning_rate = 0.1

    label_converter = LabelConverter(char_set=string.ascii_lowercase + string.digits)
    vocab_size = label_converter.get_vocab_size()

    device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
    model = CRNN(vocab_size=vocab_size).to(device)
    # model.load_state_dict(torch.load('output/weight.pth', map_location=device))

    train_loader, val_loader = get_loader('data/CAPTCHA Images/', batch_size=batch_size)

    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
    # scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 10, 2)

    train_losses, val_losses = [], []
    for epoch in range(num_epochs):
        train_epoch_loss = fit(epoch, model, optimizer, label_converter, device, train_loader, phase='training')
        val_epoch_loss = fit(epoch, model, optimizer, label_converter, device, val_loader, phase='validation')
        print('-----------------------------------------')

        if epoch == 0 or val_epoch_loss <= np.min(val_losses):
            torch.save(model.state_dict(), 'output/weight.pth')

        train_losses.append(train_epoch_loss)
        val_losses.append(val_epoch_loss)

        write_figure('output', train_losses, val_losses)
        write_log('output', epoch, train_epoch_loss, val_epoch_loss)

        scheduler.step(val_epoch_loss)
Exemplo n.º 2
0
def main():
    epochs = config['epochs']
    train_batch_size = config['train_batch_size']
    eval_batch_size = config['eval_batch_size']
    lr = config['lr']
    show_interval = config['show_interval']
    valid_interval = config['valid_interval']
    save_interval = config['save_interval']
    cpu_workers = config['cpu_workers']
    reload_checkpoint = config['reload_checkpoint']
    valid_max_iter = config['valid_max_iter']

    img_width = config['img_width']
    img_height = config['img_height']
    data_dir = config['data_dir']

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'device: {device}')

    train_dataset = Synth90kDataset(root_dir=data_dir,
                                    mode='train',
                                    img_height=img_height,
                                    img_width=img_width)
    valid_dataset = Synth90kDataset(root_dir=data_dir,
                                    mode='dev',
                                    img_height=img_height,
                                    img_width=img_width)

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=train_batch_size,
                              shuffle=True,
                              num_workers=cpu_workers,
                              collate_fn=synth90k_collate_fn)
    valid_loader = DataLoader(dataset=valid_dataset,
                              batch_size=eval_batch_size,
                              shuffle=True,
                              num_workers=cpu_workers,
                              collate_fn=synth90k_collate_fn)

    num_class = len(Synth90kDataset.LABEL2CHAR) + 1
    crnn = CRNN(1,
                img_height,
                img_width,
                num_class,
                map_to_seq_hidden=config['map_to_seq_hidden'],
                rnn_hidden=config['rnn_hidden'],
                leaky_relu=config['leaky_relu'])
    if reload_checkpoint:
        crnn.load_state_dict(torch.load(reload_checkpoint,
                                        map_location=device))
    crnn.to(device)

    optimizer = optim.RMSprop(crnn.parameters(), lr=lr)
    criterion = CTCLoss(reduction='sum')
    criterion.to(device)

    assert save_interval % valid_interval == 0
    i = 1
    for epoch in range(1, epochs + 1):
        print(f'epoch: {epoch}')
        tot_train_loss = 0.
        tot_train_count = 0
        for train_data in train_loader:
            loss = train_batch(crnn, train_data, optimizer, criterion, device)
            train_size = train_data[0].size(0)

            tot_train_loss += loss
            tot_train_count += train_size
            if i % show_interval == 0:
                print('train_batch_loss[', i, ']: ', loss / train_size)

            if i % valid_interval == 0:
                evaluation = evaluate(crnn,
                                      valid_loader,
                                      criterion,
                                      decode_method=config['decode_method'],
                                      beam_size=config['beam_size'])
                print('valid_evaluation: loss={loss}, acc={acc}'.format(
                    **evaluation))

                if i % save_interval == 0:
                    prefix = 'crnn'
                    loss = evaluation['loss']
                    save_model_path = os.path.join(
                        config['checkpoints_dir'],
                        f'{prefix}_{i:06}_loss{loss}.pt')
                    torch.save(crnn.state_dict(), save_model_path)
                    print('save model at ', save_model_path)

            i += 1

        print('train_loss: ', tot_train_loss / tot_train_count)
Exemplo n.º 3
0
def train(field):
    alphabet = ''.join(json.load(open('./cn-alphabet.json', 'rb')))
    nclass = len(alphabet) + 1  # add the dash -
    batch_size = BATCH_SIZE
    if field == 'address' or field == 'psb':
        batch_size = 1  # image length varies

    converter = LabelConverter(alphabet)
    criterion = CTCLoss(zero_infinity=True)

    crnn = CRNN(IMAGE_HEIGHT, nc, nclass, number_hidden)
    crnn.apply(weights_init)

    image_transform = transforms.Compose([
        Rescale(IMAGE_HEIGHT),
        transforms.ToTensor(),
        Normalize()
    ])

    dataset = LmdbDataset(db_path, field, image_transform)
    dataloader = DataLoader(dataset, batch_size=batch_size,
                            shuffle=True, num_workers=4)

    image = torch.FloatTensor(batch_size, 3, IMAGE_HEIGHT, IMAGE_HEIGHT)
    text = torch.IntTensor(batch_size * 5)
    length = torch.IntTensor(batch_size)

    image = Variable(image)
    text = Variable(text)
    length = Variable(length)

    loss_avg = utils.averager()
    optimizer = optim.RMSprop(crnn.parameters(), lr=lr)

    if torch.cuda.is_available():
        crnn.cuda()
        crnn = nn.DataParallel(crnn)
        image = image.cuda()
        criterion = criterion.cuda()

    def train_batch(net, iteration):
        data = iteration.next()
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.load_data(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.load_data(text, t)
        utils.load_data(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        crnn.zero_grad()
        cost.backward()
        optimizer.step()
        return cost

    nepoch = 25
    for epoch in range(nepoch):
        train_iter = iter(dataloader)
        i = 0
        while i < len(dataloader):
            for p in crnn.parameters():
                p.requires_grad = True
            crnn.train()

            cost = train_batch(crnn, train_iter)
            loss_avg.add(cost)
            i += 1

            if i % 500 == 0:
                print('%s [%d/%d][%d/%d] Loss: %f' %
                        (datetime.datetime.now(), epoch, nepoch, i, len(dataloader), loss_avg.val()))
                loss_avg.reset()

            # do checkpointing
            if i % 500 == 0:
                torch.save(
                    crnn.state_dict(), f'{model_path}crnn_{field}_{epoch}_{i}.pth')
Exemplo n.º 4
0
def main():
    epochs = config["epochs"]
    train_batch_size = config["train_batch_size"]
    eval_batch_size = config["eval_batch_size"]
    lr = config["lr"]
    show_interval = config["show_interval"]
    valid_interval = config["valid_interval"]
    save_interval = config["save_interval"]
    cpu_workers = config["cpu_workers"]
    reload_checkpoint = config["reload_checkpoint"]
    valid_max_iter = config["valid_max_iter"]

    img_width = config["img_width"]
    img_height = config["img_height"]
    data_dir = config["data_dir"]

    torch.cuda.empty_cache()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"device: {device}")

    #     # Extracts metadata related to the file path, label, and image width + height.
    #    wbsin_dir = Path().cwd() / "data" / "processed" / "cropped_wbsin"
    #    wbsin_meta_df = extract_jpg_meta(img_dir=wbsin_dir, img_type="wbsin")
    #     # Saves the extracted metadata.
    # interim_path = Path.cwd() / "data" / "interim"
    #    interim_path.mkdir(parents=True, exist_ok=True)
    #    wbsin_meta_df.to_csv(interim_path / "wbsin_meta.csv", index=False)

    X_transforms = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.Resize((160, 1440)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, ), (0.5, )),
    ])

    wbsin_dataset = WbsinImageDataset(
        meta_file=(Path.cwd() / "data" / "processed" /
                   "processed_wbsin_meta.csv"),
        transform=X_transforms,
    )

    train_size = int(0.8 * len(wbsin_dataset))
    test_size = len(wbsin_dataset) - train_size

    train_dataset, test_dataset = torch.utils.data.random_split(
        wbsin_dataset,
        [train_size, test_size],
        generator=torch.Generator().manual_seed(42),
    )

    # Save the test_dataset

    train_dataloader = DataLoader(
        train_dataset,
        batch_size=train_batch_size,
        shuffle=True,
        num_workers=cpu_workers,
        collate_fn=wbsin_collate_fn,
    )

    test_dataloader = DataLoader(
        test_dataset,
        batch_size=eval_batch_size,
        shuffle=True,
        num_workers=cpu_workers,
        collate_fn=wbsin_collate_fn,
    )

    num_class = len(WbsinImageDataset.LABEL2CHAR) + 1
    crnn = CRNN(
        1,
        img_height,
        img_width,
        num_class,
        map_to_seq_hidden=config["map_to_seq_hidden"],
        rnn_hidden=config["rnn_hidden"],
        leaky_relu=config["leaky_relu"],
    )
    if reload_checkpoint:
        crnn.load_state_dict(torch.load(reload_checkpoint,
                                        map_location=device))
    crnn.to(device)

    optimizer = optim.RMSprop(crnn.parameters(), lr=lr)
    criterion = CTCLoss(reduction="sum")
    criterion.to(device)

    assert save_interval % valid_interval == 0
    i = 1
    for epoch in range(1, epochs + 1):
        print(f"epoch: {epoch}")
        tot_train_loss = 0.0
        tot_train_count = 0
        for train_data in train_dataloader:
            loss = train_batch(crnn, train_data, optimizer, criterion, device)
            train_size = train_data[0].size(0)

            tot_train_loss += loss
            tot_train_count += train_size
            if i % show_interval == 0:
                print("train_batch_loss[", i, "]: ", loss / train_size)

            if i % valid_interval == 0:
                evaluation = evaluate(
                    crnn,
                    test_dataloader,
                    criterion,
                    decode_method=config["decode_method"],
                    beam_size=config["beam_size"],
                )
                print(
                    "valid_evaluation: loss={loss}, acc={acc}, char_acc={char_acc}"
                    .format(**evaluation))

                if i % save_interval == 0:
                    prefix = "crnn"
                    loss = evaluation["loss"]
                    save_model_path = os.path.join(
                        config["checkpoints_dir"],
                        f"{prefix}_{i:06}_loss{loss}.pt")
                    torch.save(crnn.state_dict(), save_model_path)
                    print("save model at ", save_model_path)
            i += 1

        print("train_loss: ", tot_train_loss / tot_train_count)
Exemplo n.º 5
0
# 损失函数
criterion = CTCLoss()

crnn = CRNN(imgH, nc, nclass, nh, ngpu)
crnn.apply(weights_init)
if os.path.exists('/home/hecong/temp/data/ocr/simple_ocr.pkl'):
    crnn.load_state_dict(
        torch.load('/home/hecong/temp/data/ocr/simple_ocr.pkl'))

image = torch.FloatTensor(batchSize, 3, imgH, imgH)
text = torch.IntTensor(batchSize * 5)
length = torch.IntTensor(batchSize)

# optimizer = optim.Adam(
#     crnn.parameters(), lr=lr, betas=(beta1, 0.999))
optimizer = optim.SGD(crnn.parameters(), lr=lr, momentum=MOMENTUM)

for epoch in range(EPOCH):
    for step, (t_image, t_label) in enumerate(train_loader):
        batch_size = t_image.size(0)
        utils.loadData(image, t_image)
        t, l = converter.encode(t_label)
        utils.loadData(text, t)
        utils.loadData(length, l)
        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        optimizer.zero_grad()
        cost = criterion(preds, text, preds_size, length) / batch_size
        cost.backward()
        optimizer.step()
Exemplo n.º 6
0
def train_model():

    model = CRNN().cuda()
    writer = SummaryWriter('./tblogs/%f/' % learning_rate)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), weight_decay=weight_decay, lr=learning_rate)
    lr_schedule = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20, 30, 40, 50], gamma=0.1)
    t0 = time.time()
    cnt = 0
    total_step = len(trainloader)
    hidden = None
    class_correct = list(0. for i in range(num_classes))
    class_total = list(0. for i in range(num_classes))
    classes = ('True', 'False')
    start_epoch = 0
    ########################
    ####断点重训########
    if RESUME:
        path_checkpoint = 'F:/EEG_data/MNE_test/code/reweights/checkpoint/%f/ckpt_best.pth' % (learning_rate)  # 断点路径
        checkpoint = torch.load(path_checkpoint)  # 加载断点

        model.load_state_dict(checkpoint['net'])  # 加载模型可学习参数

        optimizer.load_state_dict(checkpoint['optimizer'])  # 加载优化器参数
        start_epoch = checkpoint['epoch']  # 设置开始的epoch
        lr_schedule.load_state_dict(checkpoint['lr_schedule'])

    for epoch in range(start_epoch + 1, num_epochs + 1):

        # keep track of training
        train_loss = 0.0
        train_counter = 0
        train_losses = 0.0
        ###################
        # train the model #
        ###################
        model.train()
        for data, target in trainloader:
            data, target = data.cuda(), target.cuda()
            target = target.long()
            optimizer.zero_grad()
            output, hidden = model(data, hidden)
            a, b = hidden
            hidden = (a.data, b.data)
            loss = criterion(output, target)
            # print(target.data)
            loss.backward()
            optimizer.step()
            train_loss += (loss.item() * data.size(0))
            train_counter += data.size(0)
            train_losses = (train_loss / train_counter)
            writer.add_scalar('Train/Loss', train_losses, epoch)
            cnt += 1
            if cnt % 10 == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                      .format(epoch + 1, num_epochs, cnt + 1, total_step, loss.item()))
        cnt = 0
        checkpoint = {
            "net": model.state_dict(),
            'optimizer': optimizer.state_dict(),
            "epoch": epoch,
            'lr_schedule': lr_schedule.state_dict()
        }
        if not os.path.isdir("F:/EEG_data/MNE_test/code/reweights/checkpoint/%f" % (learning_rate)):
            os.makedirs("F:/EEG_data/MNE_test/code/reweights/checkpoint/%f" % (learning_rate))
        torch.save(checkpoint, 'F:/EEG_data/MNE_test/code/reweights/checkpoint/%f/ckpt_best.pth' % (learning_rate))
        if epoch % 200 == 0:
            torch.save(model.state_dict(), './model_EEG.pt')
        #############
        # eval
        #############
        #TODO add eval part
    # torch.save(model.state_dict(), './model_EEG.pt')
    time_total = time.time() - t0
    print('Total time: {:4.3f}, average time per epoch: {:4.3f}'.format(time_total, time_total / num_epochs))
Exemplo n.º 7
0
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        drop_last=True,
        num_workers=6,
    )

    val_dataset = dataset.Validate

    val_loader = DataLoader(val_dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=False,
                            num_workers=4)

    model = CRNN(**MODEL_PARAMS)
    optimizer = torch.optim.Adam(model.parameters())
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)

    callbacks = [
        CheckpointCallback(save_n_best=10),
        CustomCallback(metric_names=['accuracy'],
                       meter_list=[WrapAccuracy(alphabet)])
    ]

    runner = SupervisedRunner(input_key="image", input_target_key="targets")

    runner.train(model=model,
                 criterion=WrapCTCLoss(alphabet),
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders={
Exemplo n.º 8
0
converter = utils.strLabelConverter(opt.alphabet)
#criterion = CTCLoss()
criterion = nn.MSELoss()

#If GPU usage is allowed
if opt.cuda:
    crnn.cuda()
    crnn = nn.DataParallel(crnn, device_ids=range(opt.ngpu))
    criterion = criterion.cuda()

# loss averager
loss_avg = utils.averager()

#creating optimizer
if opt.adam:
    optimizer = optim.Adam(crnn.parameters(),
                           lr=opt.lr,
                           betas=(opt.beta1, 0.999))
elif opt.adadelta:
    optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr)
else:
    optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr)

for epoch in range(opt.niter):
    train_iter = iter(train_dataloader)
    i = 0
    train_iters = len(train_dataloader)
    print('No. of epoch: ' + str(epoch))
    while i < train_iters:
        data = train_iter.next()
        cost = trainBatch(crnn,
Exemplo n.º 9
0
'''
train_bs = next(iter(trainset_loader))
img,encode,true_len = train_bs  
print(img.shape)
print(encode)
print(true_len)
plt.figure(figsize=(40,40))
plt.axis("off")
plt.imshow(np.transpose(vutils.make_grid(img,nrow=2,padding=1,normalize=True).numpy(),(1,2,0)), cmap='gray')
plt.show() 
'''

###### Training Config #####
decoder = GreedyDecoder()
model = CRNN().to(device)
optimizer = optim.Adam(model.parameters())
criterion = nn.CTCLoss()

min_loss = 1e8
#Training
for epoch in range(nEpochs):
    epoch_loss = 0
    model.train()
    with tqdm(total=len(trainset_loader), desc='[Train]') as bar:
        for idx, (img, targets, true_len) in enumerate(trainset_loader):
            img, targets, true_len = img.to(device), targets.to(
                device), true_len.to(device)
            optimizer.zero_grad()
            output = model(img)  #[w, bs, 1782]
            torch.cuda.empty_cache()
            seq_len = torch.tensor([output.shape[0]] * output.shape[1])
Exemplo n.º 10
0
    return running_loss / i


if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    transform = transforms.Compose([
        transforms.Resize((RESIZE_H, RESIZE_W)),
        transforms.Grayscale(),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ])

    net = CRNN()
    net = net.to(device)
    criterion = nn.CTCLoss()
    optimizer = optim.Adam(net.parameters(),
                           lr=args.learning_rate,
                           weight_decay=args.weight_decay)
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer,
                                             step_size=5000,
                                             gamma=0.5)

    if args.load_model:
        load_path = os.path.join(os.getcwd(), args.load_model)
        net.load_state_dict(torch.load(load_path))

    save_dir = os.path.join(os.getcwd(), args.save_dir)
    if not os.path.isdir(save_dir):
        os.mkdir(save_dir)

    train_data = syn_text(
Exemplo n.º 11
0
def main():

    parser = ArgumentParser()
    parser.add_argument('-d',
                        '--data_path',
                        dest='data_path',
                        type=str,
                        default='../../data/',
                        help='path to the data')
    parser.add_argument('--epochs',
                        '-e',
                        dest='epochs',
                        type=int,
                        help='number of train epochs',
                        default=2)
    parser.add_argument('--batch_size',
                        '-b',
                        dest='batch_size',
                        type=int,
                        help='batch size',
                        default=16)
    parser.add_argument('--load',
                        '-l',
                        dest='load',
                        type=str,
                        help='pretrained weights',
                        default=None)
    parser.add_argument('-v',
                        '--val_split',
                        dest='val_split',
                        default=0.8,
                        type=float,
                        help='train/val split')
    parser.add_argument('--augs',
                        '-a',
                        dest='augs',
                        type=float,
                        help='degree of geometric augs',
                        default=0)

    args = parser.parse_args()
    OCR_MODEL_PATH = '../pretrained/ocr.pt'

    all_marks = load_json(os.path.join(args.data_path, 'train.json'))
    test_start = int(args.val_split * len(all_marks))
    train_marks = all_marks[:test_start]
    val_marks = all_marks[test_start:]

    w, h = (320, 64)
    train_transforms = transforms.Compose([
        #Rotate(max_angle=args.augs * 7.5, p=0.5),  # 5 -> 7.5
        #Pad(max_size=args.augs / 10, p=0.1),
        Resize(size=(w, h)),
        transforms.ToTensor()
    ])
    val_transforms = transforms.Compose(
        [Resize(size=(w, h)), transforms.ToTensor()])
    alphabet = abc

    train_dataset = OCRDataset(marks=train_marks,
                               img_folder=args.data_path,
                               alphabet=alphabet,
                               transforms=train_transforms)
    val_dataset = OCRDataset(marks=val_marks,
                             img_folder=args.data_path,
                             alphabet=alphabet,
                             transforms=val_transforms)

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  drop_last=True,
                                  num_workers=0,
                                  collate_fn=collate_fn_ocr,
                                  timeout=0,
                                  shuffle=True)

    val_dataloader = DataLoader(val_dataset,
                                batch_size=args.batch_size,
                                drop_last=False,
                                num_workers=0,
                                collate_fn=collate_fn_ocr,
                                timeout=0)

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    model = CRNN(alphabet)
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=3e-4,
                                 amsgrad=True,
                                 weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=10,
                                                           factor=0.5,
                                                           verbose=True)
    criterion = F.ctc_loss

    try:
        train(model, criterion, optimizer, scheduler, train_dataloader,
              val_dataloader, OCR_MODEL_PATH, args.epochs, device)
    except KeyboardInterrupt:
        torch.save(model.state_dict(), OCR_MODEL_PATH + 'INTERRUPTED_')
        #logger.info('Saved interrupt')
        sys.exit(0)