def training():
    for total_steps in range(params.niter):  #训练多少个批次
        train_iter = iter(train_loader)  #用来生成迭代器,返回一个迭代器对象
        i = 0
        print(len(train_loader))
        while i < len(train_loader):
            for p in crnn.parameters():
                p.requires_grad = True
            crnn.train()
            cost = trainBatch(crnn, criterion, optimizer,
                              train_iter)  #criterion 是ctcloss
            loss_avg.add(cost)  #计算平均误差
            i += 1
            if i % params.displayInterval == 0:  #多少次循环打印一次
                print('[%d/%d][%d/%d] Loss: %f' %
                      (total_steps, params.niter, i, len(train_loader),
                       loss_avg.val()))
                loss_avg.reset()
            if i % params.valInterval == 0:  #开始验证
                val(crnn, test_dataset, criterion)
        if (total_steps + 1) % params.saveInterval == 0:  #保存模型
            torch.save(
                crnn.state_dict(),
                '{0}/crnn_Rec_done_{1}_{2}.pth'.format(params.experiment,
                                                       total_steps, i))
示例#2
0
def training():
    accuracy_rate = 0
    for total_steps in range(params.niter):
        train_iter = iter(train_loader)
        i = 0
        print(len(train_loader))
        while i < len(train_loader):
            for p in crnn.parameters():
                p.requires_grad = True
            crnn.train()
            cost = trainBatch(crnn, criterion, optimizer, train_iter)
            loss_avg.add(cost)
            i += 1
            if i % params.displayInterval == 0:
                print('[%d/%d][%d/%d] Loss: %f' %
                      (total_steps, params.niter, i, len(train_loader),
                       loss_avg.val()))
                loss_avg.reset()
            if i % params.valInterval == 0:
                accuracy = val(crnn, test_dataset, criterion)
                if accuracy > accuracy_rate:
                    torch.save(
                        crnn.state_dict(),
                        '{0}/rnn_no_IO_{1}_{2}_{3}.pth'.format(
                            params.experiment, total_steps, i, accuracy))
                    accuracy_rate = accuracy
示例#3
0
def train(crnn, train_loader, criterion, iteration):
    for p in crnn.parameters():
        p.requires_grad = True
    crnn.train()
    for i_batch, (image, index) in enumerate(train_loader):
        if args.cuda:
            image = image.cuda()
            criterion = criterion.cuda()
        label = utils.get_batch_label(dataset, index)
        preds = crnn(image)
        batch_size = image.size(0)
        index = np.array(index.data.numpy())
        text, length = converter.encode(label)
        preds_size = torch.IntTensor([preds.size(0)] * batch_size)
        # print(preds.shape, text.shape, preds_size.shape, length.shape)
        # torch.Size([41, 16, 6736]) torch.Size([160]) torch.Size([16]) torch.Size([16])
        cost = criterion(preds, text, preds_size, length) / batch_size

        crnn.zero_grad()
        cost.backward()
        optimizer.step()
        loss_avg.add(cost)
        if i_batch == 100:
            break
        if (i_batch + 1) % params.displayInterval == 0:
            print('[%d/%d][%d/%d] Loss: %f' %
                  (iteration, params.niter, i_batch, len(train_loader),
                   loss_avg.val()))
            loss_avg.reset()
示例#4
0
def training(start):
    best = 0.982470
    for total_steps in range(start, params.niter):
        train_iter = iter(train_loader)
        i = 0
        print(len(train_loader))
        while i < len(train_loader):
            for p in crnn.parameters():
                p.requires_grad = True
            crnn.train()
            cost = trainBatch(crnn, criterion, optimizer, train_iter)
            loss_avg.add(cost)
            i += 1
            if i % params.displayInterval == 0:
                print('[%d/%d][%d/%d] Loss: %f' %
                      (total_steps, params.niter, i, len(train_loader),
                       loss_avg.val()))
                loss_avg.reset()
            if i % params.valInterval == 0:
                acc = val(crnn, test_dataset, criterion)
                if acc > best:
                    best = acc
                    print('save model ..........')
                    # ti = time.strftime('%Y-%m-%d', time.localtime(time.time()))
                    torch.save(
                        crnn.state_dict(), '{0}/best_model_{1}_{2}.pth'.format(
                            params.experiment, total_steps, i))
示例#5
0
def training(crnn, train_loader, criterion, optimizer):
    for total_steps in range(params.niter):
        train_iter = iter(train_loader)
        i = 0
        print("total number", len(train_loader))
        while i < len(train_loader):
            # for p in crnn.parameters():
            # 	p.requires_grad = True
            #训练阶段
            crnn.train()
            cost = trainBatch(crnn, criterion, optimizer, train_iter)
            loss_avg.add(cost)
            i += 1
            if i % params.displayInterval == 0:
                print('[%d/%d][%d/%d] Loss: %f' %
                      (total_steps, params.niter, i, len(train_loader),
                       loss_avg.val()))
                loss_avg.reset()
            if i % params.valInterval == 0:
                val(crnn, test_dataset, criterion)
        # 每两个epoch就保存一次模型
        if (total_steps + 1) % params.saveInterval == 0:
            torch.save(
                crnn.state_dict(),
                '{0}/crnn_Rec_done_{1}_{2}.pth'.format(params.experiment,
                                                       total_steps, i))
def training():
    for epoch in range(params.nEpochs):
        train_iter = iter(train_loader)
        i = 0
        while i < len(train_loader):
            for p in crnn.parameters():
                p.requires_grad = True
            crnn.train()
            cost = trainBatch(crnn, criterion, optimizer, train_iter)
            loss_avg.add(cost)
            i += 1
            if i % params.displayInterval == 0:
                print('[%d/%d][%d/%d] Loss: %f' %
                      (epoch, params.nEpochs, i, len(train_loader),
                       loss_avg.val()))
                loss_avg.reset()
            if i % params.valInterval == 0:
                val(crnn, test_dataset, criterion)
        if (epoch + 1) % params.saveEpoch == 0:
            torch.save(
                crnn.state_dict(),
                '{0}/crnn_Rec_done_{1}_{2}.pth'.format(params.experiment,
                                                       epoch, i))
            print('Saved model params in dir {}'.format(params.experiment))
            val(crnn, test_dataset, criterion)
示例#7
0
def training():
    for total_steps in range(params.niter):
        train_iter = iter(train_loader)
        i = 0
        logger.info('length of train_data: %d' % (len(train_loader)))
        while i < len(train_loader):

            for p in crnn.parameters():
                p.requires_grad = True

            crnn.train()
            val(crnn, test_dataset1, test_dataset2, test_dataset3, total_steps,
                i, criterion)
            return
            cost = trainBatch(crnn, criterion, optimizer, train_iter)
            loss_avg.add(cost)
            i += 1
            if i % params.displayInterval == 0:
                logger.info('[%d/%d][%d/%d] Loss: %f' %
                            (total_steps, params.niter, i, len(train_loader),
                             loss_avg.val()))
                loss_avg.reset()
        val(crnn, test_dataset, total_steps, i, criterion)
        if (total_steps + 1) % params.saveInterval == 0:
            string = "model save to {0}crnn_Rec_done_epoch_{1}.pth".format(
                log_dir, total_steps)
            logger.info(string)
            torch.save(
                crnn.state_dict(),
                '{0}crnn_Rec_done_epoch_{1}.pth'.format(log_dir, total_steps))
def training():
    for total_steps in range(test_params.niter):
        train_iter = iter(train_loader)
        i = 0
        logger.info('length of train_data: %d' % (len(train_loader)))

        eval_time = 0.0
        prog_bar = mmcv.ProgressBar(test_params.displayInterval)
        while i < len(train_loader):
            torch.cuda.empty_cache()
            for p in crnn.parameters():
                p.requires_grad = True
            crnn.train()
            val(crnn, test_dataset, criterion, total_steps, i)
            return
            start = time.time()
            cost = trainBatch(crnn, criterion, optimizer, train_iter)
            eval_time += time.time() - start

            loss_avg.add(cost)
            i += 1
            prog_bar.update()

            if i % test_params.tbInterval == 0:
                print("\n>>>> Tensorboard Log")
                writer.add_scalar(
                    'train/loss', loss_avg.val(),
                    int(i + total_steps * len(train_loader)))  # record to tb

            if i % test_params.displayInterval == 0:
                sys.stdout.write("\r%100s\r" % ' ')
                sys.stdout.flush()
                logger.info('[%d/%d][%d/%d] Loss: %f, Cost: %.4fs per batch' %
                            (total_steps, test_params.niter, i,
                             len(train_loader), loss_avg.val(), eval_time / i))

                if eval_time / i < 0.2: test_params.displayInterval = 1000
                elif eval_time / i < 0.5: test_params.displayInterval = 400
                elif eval_time / i < 1.0: test_params.displayInterval = 200
                prog_bar = mmcv.ProgressBar(
                    test_params.displayInterval)  # new interval

                loss_avg.reset()

        val(crnn, test_dataset, criterion, total_steps, i)
        torch.cuda.empty_cache()
        if (total_steps + 1) % test_params.saveInterval == 0:
            string = "model save to {0}crnn_Rec_done_epoch_{1}.pth".format(
                log_dir, total_steps)
            logger.info(string)
            torch.save(
                crnn.state_dict(),
                '{0}crnn_Rec_done_epoch_{1}.pth'.format(log_dir, total_steps))
示例#9
0
def trainBatch():
    crnn.train()
    data = train_iter.next()
    image, text, length, _ = data
    image = image.to(device)
    image.requires_grad_()
    batch_size = image.size(0)

    preds = crnn(image)
    preds_size = torch.IntTensor([preds.size(0)] * batch_size)
    cost = criterion(preds, text, preds_size, length)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
示例#10
0
def trainBatch():
    crnn.train()
    data = train_iter.next()
    image, text, length, _ = data
    image = image.to(device)
    image.requires_grad_()
    batch_size = image.size(0)
    preds = crnn(image)
    preds = torch.clamp(preds, min=-50.0)
    if random.random() < 0.01:
        print(preds[5, 0, :].topk(3))
    preds_size = torch.IntTensor([preds.size(0)] * batch_size)
    cost = criterion(preds.log_softmax(2), text, preds_size, length)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
示例#11
0
def training():
    for total_steps in range(params.niter):
        if total_steps < 50:
            train_loader = s_train_loader
            test_dataset = s_test_dataset
        elif total_steps < 80:
            train_loader = m_train_loader
            test_dataset = m_test_dataset
        else:
            train_loader = l_train_loader
            test_dataset = l_test_dataset

        # train_loader = train_loader
        # test_dataset = test_dataset

        # if total_steps < 60:
        #     optimizer = optim.RMSprop(crnn.parameters(), lr=params.lr)
        # else:
        #     optimizer = optim.Adam(crnn.parameters(), lr=params.lr,
        #                            betas=(params.beta1, 0.999))
        train_iter = iter(train_loader)
        i = 0
        print(len(train_loader))
        while i < len(train_loader):
            for p in crnn.parameters():
                p.requires_grad = True
            crnn.train()
            cost = trainBatch(crnn, criterion, optimizer, train_iter)
            loss_avg.add(cost)
            i += 1
            if i % params.displayInterval == 0:
                print('[%d/%d][%d/%d] Loss: %f' %
                      (total_steps, params.niter, i, len(train_loader),
                       loss_avg.val()))
                loss_avg.reset()
            if i % params.valInterval == 0:
                val(crnn, test_dataset, criterion)
        if (total_steps + 1) % params.saveInterval == 0:
            torch.save(
                crnn.state_dict(),
                '{0}/crnn_Rec_done_{1}_{2}.pth'.format(params.experiment,
                                                       total_steps, i))
示例#12
0
def main(crnn, train_loader, val_loader, criterion, optimizer):

    if args.cuda:
        crnn.cuda()
    Iteration = 0
    while Iteration < params.niter:
        train(crnn, train_loader, criterion, Iteration)
        ## max_i: cut down the consuming time of testing, if you'd like to validate on the whole testset, please set it to len(val_loader)
        accuracy = val(crnn, val_loader, criterion, Iteration, max_i=1000)
        for p in crnn.parameters():
            p.requires_grad = True
        crnn.train()
        if accuracy > params.best_accuracy:
            torch.save(
                crnn.state_dict(),
                '{0}/crnn_Rec_done_{1}_{2}.pth'.format(params.experiment,
                                                       total_steps, accuracy))
            torch.save(crnn.state_dict(),
                       '{0}/crnn_best.pth'.format(params.experiment))
        print("is best accuracy: {0}".format(accuracy > params.best_accuracy))
        Iteration += 1
示例#13
0
def training(crnn, train_loader, criterion, optimizer):
    for total_steps in range(params.niter):
        train_iter = iter(train_loader)
        i = 0
        while i < len(train_loader):
            for p in crnn.parameters():
                p.requires_grad = True
            crnn.train()
            cost = train_batch(crnn, criterion, optimizer, train_iter)
            loss_avg.add(cost)
            i += 1
            if i % params.displayInterval == 0:
                print('[%d/%d][%d/%d] Loss: %f' %
                      (total_steps, params.niter, i, len(train_loader),
                       loss_avg.val()))
                loss_avg.reset()
            if i % params.valInterval == 0:
                val(crnn, criterion)
        if total_steps % params.saveInterval == 0:
            save_name = '{0}/crnn_Rec_done_{1}_{2}.pth'.format(
                params.experiment, total_steps, i)
            torch.save(crnn.state_dict(), save_name)
            print('%s saved' % save_name)
示例#14
0
def train(crnn, train_loader, criterion, optimizer, valid_loader):
    for p in crnn.parameters():
        p.requires_grad = True
    crnn.train()
    train_iter = iter(train_loader)
    # loss averager
    loss_avg = utils.averager()
    for i in range(len(train_loader)):
        data = train_iter.next()
        _, images, texts = data
        batch_size = images.size(0)
        t, l = converter.encode(texts)
        images = images.cuda()
        preds = crnn(images)
        preds_size = torch.IntTensor([preds.size(0)] * batch_size)
        cost = criterion(preds, t, preds_size, l) / batch_size
        crnn.zero_grad()
        cost.backward()
        optimizer.step()
        loss_avg.add(cost)
        if (i + 1) % opt.displayInterval == 0:
            print('[%d/%d][%d/%d] Loss: %f' %
                  (epoch, opt.nepoch, i, len(train_loader), loss_avg.val()))
            loss_avg.reset()
示例#15
0
    # sim_preds = converter.beam_decode(preds.data)

    cer_loss = utils.cer_loss(sim_preds, cpu_texts, ignore_case=False)
    return cost, cer_loss, batch_size


for epoch in range(opt.nepoch):
    train_iter = iter(train_loader)
    i = 0

    train_ctc = 0
    train_cer = 0
    while i < len(train_loader):
        for p in crnn.parameters():
            p.requires_grad = True
        crnn.train()

        cost, cer_loss, batch_size = trainBatch(crnn, criterion, optimizer)
        train_cer += cer_loss
        train_ctc += cost * batch_size
        loss_avg.add(cost)
        i += 1

        if i % opt.displayInterval == 0:
            print('[%d/%d][%d/%d] Loss: %f' %
                  (epoch, opt.nepoch, i, len(train_loader), loss_avg.val()))
            loss_avg.reset()

        if opt.valid_result and i % opt.valInterval == 0:
            val(crnn, criterion)
示例#16
0
def main(arg):
    print(arg)
    train_dataset = dataset.lmdbDataset(
        path=arg.train_root,
        # transform=dataset.resizeNormalize((imgW,imgH)),
    )
    test_dataset = dataset.lmdbDataset(
        path=arg.test_root,
        # transform=dataset.resizeNormalize((arg.imgW,arg.imgH)),
    )
    d = test_dataset.__getitem__(0)
    l = test_dataset.__len__()
    train_loader = DataLoader(train_dataset,
                              num_workers=arg.num_workers,
                              batch_size=arg.batch_size,
                              collate_fn=dataset.alignCollate(
                                  imgH=arg.imgH,
                                  imgW=arg.imgW,
                                  keep_ratio=arg.keep_ratio),
                              shuffle=True,
                              drop_last=True)

    criterion = CTCLoss()
    converter = utils.Converter(arg.num_class)
    crnn = CRNN(imgH=arg.imgH, nc=3, nclass=arg.num_class + 1, nh=256)

    # custom weights initialization called on crnn
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv') != -1:
            m.weight.data.normal_(0.0, 0.02)
        elif classname.find('BatchNorm') != -1:
            m.weight.data.normal_(1.0, 0.02)
            m.bias.data.fill_(0)

    crnn.apply(weights_init)
    print(crnn)

    image = torch.FloatTensor(arg.batch_size, 3, arg.imgH, arg.imgW)
    text = torch.IntTensor(arg.batch_size * 5)
    length = torch.IntTensor(arg.batch_size)

    image = Variable(image)
    text = Variable(text)
    length = Variable(length)

    # loss averager
    loss_avg = utils.averager()

    # setup optimizer
    if arg.opt == 'adam':
        optimizer = optim.Adam(crnn.parameters(), 0.01, betas=(0.5, 0.999))
    elif arg.opt == 'adadelta':
        optimizer = optim.Adadelta(crnn.parameters())
    else:
        optimizer = optim.RMSprop(crnn.parameters(), 0.01)

    for epoch in range(arg.n_epoch):
        train_iter = iter(train_loader)
        i = 0
        while i < len(train_loader):
            for p in crnn.parameters():
                p.requires_grad = True
            crnn.train()

            data = train_iter.next()
            cpu_images, cpu_texts = data
            batch_size = cpu_images.size(0)
            utils.loadData(image, cpu_images)
            text_labels, l = converter.encode(cpu_texts)
            utils.loadData(text, text_labels)
            utils.loadData(length, l)

            preds = crnn(image)
            preds_size = Variable(torch.IntTensor([preds.size(0)] *
                                                  batch_size))
            cost = criterion(preds, text, preds_size, length) / batch_size
            crnn.zero_grad()
            cost.backward()
            optimizer.step()

            loss_avg.add(cost)
            i += 1

            if i % arg.displayInterval == 0:
                print(
                    '[%d/%d][%d/%d] Loss: %f' %
                    (epoch, arg.n_epoch, i, len(train_loader), loss_avg.val()))
                loss_avg.reset()

            if i % arg.testInterval == 0:
                test(arg, crnn, test_dataset, criterion, image, text, length)

            # do checkpointing
            if i % arg.saveInterval == 0:
                name = '{0}/netCRNN_{1}_{2}_{3}_{4}.pth'.format(
                    arg.model_dir, arg.num_class, arg.type, epoch, i)
                torch.save(crnn.state_dict(), name)
                print('model saved at ', name)
    torch.save(
        crnn.state_dict(),
        '{0}/netCRNN_{1}_{2}.pth'.format(arg.model_dir, arg.num_class,
                                         arg.type))
示例#17
0
    preds = crnn(image)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    cost = criterion(preds, text, preds_size, length) / batch_size
    crnn.zero_grad()
    cost.backward()
    optimizer.step()
    return cost


for epoch in range(opt.niter):
    train_iter = iter(train_loader)
    i = 0
    while i < len(train_loader):
        for p in crnn.parameters():
            p.requires_grad = True
        crnn.train()

        cost = trainBatch(crnn, criterion, optimizer)
        loss_avg.add(cost)
        i += 1

        if i % opt.displayInterval == 0:
            print('[%d/%d][%d/%d] Loss: %f' %
                  (epoch, opt.niter, i, len(train_loader), loss_avg.val()))
            loss_avg.reset()

        if i % opt.valInterval == 0:
            val(crnn, test_dataset, criterion)

        # do checkpointing
        if i % opt.saveInterval == 0:
def training():
    for total_steps in range(params.niter):
        train_iter = iter(train_loader)
        i = 0
        logger.info('length of train_data: %d' % (len(train_loader)))

        eval_time = 0.0
        prog_bar = mmcv.ProgressBar(params.displayInterval)
        while i < len(train_loader):
            i += 1
            runtime_error = False
            crnn.train()
            loss_avg.reset()
            start = time.time()
            cost = trainBatch(crnn, criterion, optimizer, train_iter)
            eval_time += time.time() - start
            loss_avg.add(cost.cpu())
            prog_bar.update()
            '''
            try:
                i += 1

                #crnn.cuda()
                crnn.train()
                loss_avg.reset()
                start = time.time()
                cost = trainBatch(crnn, criterion, optimizer, train_iter)
                eval_time += time.time()-start
                loss_avg.add(cost.cpu())
                prog_bar.update()
 
                runtime_error = False
            except RuntimeError as e:
                logger.error(e)
                runtime_error = True
            except ConnectionRefusedError as e:
                logger.error(e)
                runtime_error = True
            finally:
                if runtime_error:
                    logger.error("Warning: Some error happen")
                    gc.collect()
                    torch.cuda.empty_cache()
            '''

            if i % params.tbInterval == 0 and not runtime_error:
                print("\n>>>> Tensorboard Log")
                writer.add_scalar('train/loss', loss_avg.val(),
                                  int(i + total_steps * len(train_loader)))
                # record to tb

            if i % params.displayInterval == 0 and not runtime_error:
                sys.stdout.write("\r%100s\r" % ' ')
                sys.stdout.flush()
                logger.info('[%d/%d][%d/%d] Loss: %f, Cost: %.4fs per batch' %
                            (total_steps, params.niter, i, len(train_loader),
                             loss_avg.val(), eval_time / i))
                loss_avg.reset()
                if eval_time / i < 0.2: params.displayInterval = 1000
                elif eval_time / i < 0.5: params.displayInterval = 400
                elif eval_time / i < 1.0: params.displayInterval = 200
                prog_bar = mmcv.ProgressBar(
                    params.displayInterval)  # new interval

            # if i % params.valInterval == 0:
            #     val(crnn, test_dataset, criterion, total_steps, i)
            #     torch.cuda.empty_cache()

        torch.cuda.empty_cache()
        val(crnn, test_dataset, criterion, total_steps, i)

        if (total_steps + 1) % params.saveInterval == 0:
            string = "model save to {0}crnn_Rec_done_epoch_{1}.pth".format(
                log_dir, total_steps)
            logger.info(string)
            torch.save(
                crnn.state_dict(),
                '{0}crnn_Rec_done_epoch_{1}.pth'.format(log_dir, total_steps))