示例#1
0
def test(opt):
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)
    model = Model(opt.imgH,
                  opt.imgW,
                  opt.input_channel,
                  opt.output_channel,
                  opt.hidden_size,
                  opt.num_class,
                  opt.batch_max_length,
                  Transformation=opt.Transformation,
                  FeatureExtraction=opt.FeatureExtraction,
                  SequenceModeling=opt.SequenceModeling,
                  Prediction=opt.Prediction)
    print('model input parameters', opt.imgH, opt.imgW, opt.input_channel,
          opt.output_channel, opt.hidden_size, opt.num_class,
          opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
          opt.SequenceModeling, opt.Prediction)
    model = torch.nn.DataParallel(model).cuda()

    # load model
    if opt.saved_model != '':
        print('loading pretrained model from %s' % opt.saved_model)
        model.load_state_dict(torch.load(opt.saved_model))
        opt.name = '_'.join(opt.saved_model.split('/')[1:])
    # print(model)
    """ keep evaluation model and result logs """
    os.makedirs(f'./result/{opt.name}', exist_ok=True)
    os.system(f'cp {opt.saved_model} ./result/{opt.name}/')
    """ setup loss """
    if 'CTC' in opt.Prediction:
        criterion = CTCLoss(reduction='sum')
    else:
        criterion = torch.nn.CrossEntropyLoss(
            ignore_index=0).cuda()  # ignore [GO] token = ignore index 0
    """ evaluation """
    model.eval()
    if opt.benchmark_all_eval:  # evaluation with 10 benchmark evaluation datasets
        benchmark_all_eval(model, criterion, converter, opt)
    else:
        AlignCollate_evaluation = AlignCollate(imgH=opt.imgH, imgW=opt.imgW)
        eval_data = hierarchical_dataset(root=opt.eval_data, opt=opt)
        evaluation_loader = torch.utils.data.DataLoader(
            eval_data,
            batch_size=opt.batch_size,
            shuffle=False,
            num_workers=int(opt.workers),
            collate_fn=AlignCollate_evaluation,
            pin_memory=True)
        _, accuracy_by_best_model, _, _, _, _, _ = validation(
            model, criterion, evaluation_loader, converter, opt)

        print(accuracy_by_best_model)
        with open('./result/{0}/log_evaluation.txt'.format(opt.name),
                  'a') as log:
            log.write(str(accuracy_by_best_model) + '\n')
示例#2
0
 def __init__(self, num_classes=72):
     super(LPRNet, self).__init__()
     self.cuda = torch.cuda.is_available()
     self.num_classes = num_classes
     self.module = self.__create_module()
     self.criterion = CTCLoss(average_frames=True,
                              reduction="mean",
                              blank=0)
示例#3
0
    def __init__(self, model, train_dataloader, val_dataloader, param):
        super(CTCTrainer, self).__init__()

        # ---- create saving dir and backup files
        self.checkpoint_dir = param.saving_path
        if not os.path.exists(self.checkpoint_dir):
            os.makedirs(self.checkpoint_dir)
        self._copy_backups()
        self.log_filename = os.path.join(self.checkpoint_dir, "log.txt")

        # ---- display configurations
        pp = pprint.PrettyPrinter(indent=4)
        LOG(pp.pformat(param), self.log_filename)
        LOG(
            '=' * 50 + '\n# Params = {}'.format(
                sum(p.numel() for p in model.parameters() if p.requires_grad)),
            self.log_filename)
        self.p_tr = param.train

        # ---- construct data loaders
        self.train_dataloader = train_dataloader
        self.val_dataloader = val_dataloader

        # ---- construct model
        self.model = model

        # ---- loss function and optimizer
        self.num_classes = param.num_classes
        self.ctc = CTCLoss(reduction='mean', blank=self.num_classes - 1)

        self.optimizer = optim.RMSprop(self.model.parameters(),
                                       lr=self.p_tr.learning_rate,
                                       alpha=0.95,
                                       weight_decay=self.p_tr.l2_weight_decay)
        self.scheduler = ReduceLROnPlateau(
            self.optimizer,
            factor=self.p_tr.lr_factor,
            patience=self.p_tr.lr_reduce_patient,
            mode='min')
示例#4
0
def main(opts):

    model_name = 'OCT-E2E-MLT'
    net = OctMLT(attention=True)
    print("Using {0}".format(model_name))

    learning_rate = opts.base_lr
    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=opts.base_lr,
                                 weight_decay=weight_decay)
    step_start = 0
    if os.path.exists(opts.model):
        print('loading model from %s' % args.model)
        step_start, learning_rate = net_utils.load_net(args.model, net)

    if opts.cuda:
        net.cuda()

    net.train()

    data_generator = data_gen.get_batch(num_workers=opts.num_readers,
                                        input_size=opts.input_size,
                                        batch_size=opts.batch_size,
                                        train_list=opts.train_list,
                                        geo_type=opts.geo_type)

    dg_ocr = ocr_gen.get_batch(num_workers=2,
                               batch_size=opts.ocr_batch_size,
                               train_list=opts.ocr_feed_list,
                               in_train=True,
                               norm_height=norm_height,
                               rgb=True)

    train_loss = 0
    bbox_loss, seg_loss, angle_loss = 0., 0., 0.
    cnt = 0
    ctc_loss = CTCLoss()

    ctc_loss_val = 0
    box_loss_val = 0
    good_all = 0
    gt_all = 0

    best_step = step_start
    best_loss = 1000000
    best_model = net.state_dict()
    best_optimizer = optimizer.state_dict()
    best_learning_rate = learning_rate
    max_patience = 3000
    early_stop = False

    for step in range(step_start, opts.max_iters):

        # batch
        images, image_fns, score_maps, geo_maps, training_masks, gtso, lbso, gt_idxs = next(
            data_generator)
        im_data = net_utils.np_to_variable(images, is_cuda=opts.cuda).permute(
            0, 3, 1, 2)
        start = timeit.timeit()
        try:
            seg_pred, roi_pred, angle_pred, features = net(im_data)
        except:
            import sys, traceback
            traceback.print_exc(file=sys.stdout)
            continue
        end = timeit.timeit()

        # backward

        smaps_var = net_utils.np_to_variable(score_maps, is_cuda=opts.cuda)
        training_mask_var = net_utils.np_to_variable(training_masks,
                                                     is_cuda=opts.cuda)
        angle_gt = net_utils.np_to_variable(geo_maps[:, :, :, 4],
                                            is_cuda=opts.cuda)
        geo_gt = net_utils.np_to_variable(geo_maps[:, :, :, [0, 1, 2, 3]],
                                          is_cuda=opts.cuda)

        try:
            loss = net.loss(seg_pred, smaps_var, training_mask_var, angle_pred,
                            angle_gt, roi_pred, geo_gt)
        except:
            import sys, traceback
            traceback.print_exc(file=sys.stdout)
            continue

        bbox_loss += net.box_loss_value.data.cpu().numpy()
        seg_loss += net.segm_loss_value.data.cpu().numpy()
        angle_loss += net.angle_loss_value.data.cpu().numpy()

        train_loss += loss.data.cpu().numpy()
        optimizer.zero_grad()

        try:

            if step > 10000:  #this is just extra augumentation step ... in early stage just slows down training
                ctcl, gt_b_good, gt_b_all = process_boxes(images,
                                                          im_data,
                                                          seg_pred[0],
                                                          roi_pred[0],
                                                          angle_pred[0],
                                                          score_maps,
                                                          gt_idxs,
                                                          gtso,
                                                          lbso,
                                                          features,
                                                          net,
                                                          ctc_loss,
                                                          opts,
                                                          debug=opts.debug)
                ctc_loss_val += ctcl.data.cpu().numpy()[0]
                loss = loss + ctcl
                gt_all += gt_b_all
                good_all += gt_b_good

            imageso, labels, label_length = next(dg_ocr)
            im_data_ocr = net_utils.np_to_variable(imageso,
                                                   is_cuda=opts.cuda).permute(
                                                       0, 3, 1, 2)
            features = net.forward_features(im_data_ocr)
            labels_pred = net.forward_ocr(features)

            probs_sizes = torch.IntTensor(
                [(labels_pred.permute(2, 0, 1).size()[0])] *
                (labels_pred.permute(2, 0, 1).size()[1]))
            label_sizes = torch.IntTensor(
                torch.from_numpy(np.array(label_length)).int())
            labels = torch.IntTensor(torch.from_numpy(np.array(labels)).int())
            loss_ocr = ctc_loss(labels_pred.permute(2, 0,
                                                    1), labels, probs_sizes,
                                label_sizes) / im_data_ocr.size(0) * 0.5

            loss_ocr.backward()
            loss.backward()

            optimizer.step()
        except:
            import sys, traceback
            traceback.print_exc(file=sys.stdout)
            pass
        cnt += 1
        if step % disp_interval == 0:

            if opts.debug:

                segm = seg_pred[0].data.cpu()[0].numpy()
                segm = segm.squeeze(0)
                cv2.imshow('segm_map', segm)

                segm_res = cv2.resize(score_maps[0],
                                      (images.shape[2], images.shape[1]))
                mask = np.argwhere(segm_res > 0)

                x_data = im_data.data.cpu().numpy()[0]
                x_data = x_data.swapaxes(0, 2)
                x_data = x_data.swapaxes(0, 1)

                x_data += 1
                x_data *= 128
                x_data = np.asarray(x_data, dtype=np.uint8)
                x_data = x_data[:, :, ::-1]

                im_show = x_data
                try:
                    im_show[mask[:, 0], mask[:, 1], 1] = 255
                    im_show[mask[:, 0], mask[:, 1], 0] = 0
                    im_show[mask[:, 0], mask[:, 1], 2] = 0
                except:
                    pass

                cv2.imshow('img0', im_show)
                cv2.imshow('score_maps', score_maps[0] * 255)
                cv2.imshow('train_mask', training_masks[0] * 255)
                cv2.waitKey(10)

            train_loss /= cnt
            bbox_loss /= cnt
            seg_loss /= cnt
            angle_loss /= cnt
            ctc_loss_val /= cnt
            box_loss_val /= cnt

            if train_loss < best_loss:
                best_step = step
                best_model = net.state_dict()
                best_loss = train_loss
                best_learning_rate = learning_rate
                best_optimizer = optimizer.state_dict()
            if best_step - step > max_patience:
                print("Early stopped criteria achieved.")
                save_name = os.path.join(
                    opts.save_path,
                    'BEST_{}_{}.h5'.format(model_name, best_step))
                state = {
                    'step': best_step,
                    'learning_rate': best_learning_rate,
                    'state_dict': best_model,
                    'optimizer': best_optimizer
                }
                torch.save(state, save_name)
                print('save model: {}'.format(save_name))
                opts.max_iters = step
                early_stop = True
            try:
                print(
                    'epoch %d[%d], loss: %.3f, bbox_loss: %.3f, seg_loss: %.3f, ang_loss: %.3f, ctc_loss: %.3f, rec: %.5f in %.3f'
                    % (step / batch_per_epoch, step, train_loss, bbox_loss,
                       seg_loss, angle_loss, ctc_loss_val,
                       good_all / max(1, gt_all), end - start))
                print('max_memory_allocated {}'.format(
                    torch.cuda.max_memory_allocated()))
            except:
                import sys, traceback
                traceback.print_exc(file=sys.stdout)
                pass

            train_loss = 0
            bbox_loss, seg_loss, angle_loss = 0., 0., 0.
            cnt = 0
            ctc_loss_val = 0
            good_all = 0
            gt_all = 0
            box_loss_val = 0

        #if step % valid_interval == 0:
        #  validate(opts.valid_list, net)
        if step > step_start and (step % batch_per_epoch == 0):
            save_name = os.path.join(opts.save_path,
                                     '{}_{}.h5'.format(model_name, step))
            state = {
                'step': step,
                'learning_rate': learning_rate,
                'state_dict': net.state_dict(),
                'optimizer': optimizer.state_dict(),
                'max_memory_allocated': torch.cuda.max_memory_allocated()
            }
            torch.save(state, save_name)
            print('save model: {}\tmax memory: {}'.format(
                save_name, torch.cuda.max_memory_allocated()))
    if not early_stop:
        save_name = os.path.join(opts.save_path, '{}.h5'.format(model_name))
        state = {
            'step': step,
            'learning_rate': learning_rate,
            'state_dict': net.state_dict(),
            'optimizer': optimizer.state_dict()
        }
        torch.save(state, save_name)
        print('save model: {}'.format(save_name))
示例#5
0
                          noise_levels=(args.noise_min, args.noise_max))

        rnn_type = args.rnn_type.lower()
        assert rnn_type in supported_rnns, "rnn_type should be either lstm, rnn or gru"
        model = DeepSpeech(rnn_hidden_size=args.hidden_size,
                           nb_layers=args.hidden_layers,
                           labels=labels,
                           rnn_type=supported_rnns[rnn_type],
                           audio_conf=audio_conf,
                           bidirectional=args.bidirectional)
        parameters = model.parameters()
        optimizer = torch.optim.SGD(parameters,
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    nesterov=True)
    criterion = CTCLoss()
    decoder = GreedyDecoder(labels)
    train_dataset = SpectrogramDataset(audio_conf=audio_conf,
                                       manifest_filepath=args.train_manifest,
                                       labels=labels,
                                       normalize=True,
                                       augment=args.augment)
    test_dataset = SpectrogramDataset(audio_conf=audio_conf,
                                      manifest_filepath=args.val_manifest,
                                      labels=labels,
                                      normalize=True,
                                      augment=False)
    if not args.distributed:
        train_sampler = BucketingSampler(train_dataset,
                                         batch_size=args.batch_size)
    else:
示例#6
0
    def train(self, client_data, model, conf):

        clientId = conf.clientId
        logging.info(f"Start to train (CLIENT: {clientId}) ...")
        tokenizer, device = conf.tokenizer, conf.device

        model = model.to(device=device)
        model.train()

        trained_unique_samples = min(len(client_data.dataset),
                                     conf.local_steps * conf.batch_size)
        if conf.gradient_policy == 'prox':
            global_model = [param.data.clone() for param in model.parameters()]

        if conf.task == "detection":
            lr = conf.learning_rate
            params = []
            for key, value in dict(model.named_parameters()).items():
                if value.requires_grad:
                    if 'bias' in key:
                        params += [{'params':[value],'lr':lr*(cfg.TRAIN.DOUBLE_BIAS + 1), \
                                'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
                    else:
                        params += [{
                            'params': [value],
                            'lr': lr,
                            'weight_decay': cfg.TRAIN.WEIGHT_DECAY
                        }]
            optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM)

        elif conf.task == 'nlp':
            no_decay = ["bias", "LayerNorm.weight"]
            optimizer_grouped_parameters = [
                {
                    "params": [
                        p for n, p in model.named_parameters()
                        if not any(nd in n for nd in no_decay)
                    ],
                    "weight_decay":
                    conf.weight_decay,
                },
                {
                    "params": [
                        p for n, p in model.named_parameters()
                        if any(nd in n for nd in no_decay)
                    ],
                    "weight_decay":
                    0.0,
                },
            ]
            optimizer = torch.optim.AdamW(optimizer_grouped_parameters,
                                          lr=conf.learning_rate)
        else:
            optimizer = torch.optim.SGD(model.parameters(),
                                        lr=conf.learning_rate,
                                        momentum=0.9,
                                        weight_decay=5e-4)

        if conf.task == 'voice':
            from torch_baidu_ctc import CTCLoss
            criterion = CTCLoss(reduction='none').to(device=device)
        else:
            criterion = torch.nn.CrossEntropyLoss(reduction='none').to(
                device=device)

        epoch_train_loss = 1e-4

        error_type = None
        completed_steps = 0

        if conf.task == "detection":
            im_data = Variable(torch.FloatTensor(1).cuda())
            im_info = Variable(torch.FloatTensor(1).cuda())
            num_boxes = Variable(torch.LongTensor(1).cuda())
            gt_boxes = Variable(torch.FloatTensor(1).cuda())

        # TODO: One may hope to run fixed number of epochs, instead of iterations
        while completed_steps < conf.local_steps:
            try:
                for data_pair in client_data:

                    if conf.task == 'nlp':
                        (data, _) = data_pair
                        data, target = mask_tokens(data,
                                                   tokenizer,
                                                   conf,
                                                   device=device)
                    elif conf.task == 'voice':
                        (data, target, input_percentages,
                         target_sizes), _ = data_pair
                        input_sizes = input_percentages.mul_(int(
                            data.size(3))).int()
                    elif conf.task == 'detection':
                        temp_data = data_pair
                        target = temp_data[4]
                        data = temp_data[0:4]
                    else:
                        (data, target) = data_pair

                    if conf.task == "detection":
                        im_data.resize_(data[0].size()).copy_(data[0])
                        im_info.resize_(data[1].size()).copy_(data[1])
                        gt_boxes.resize_(data[2].size()).copy_(data[2])
                        num_boxes.resize_(data[3].size()).copy_(data[3])
                    elif conf.task == 'speech':
                        data = torch.unsqueeze(data, 1).to(device=device)
                    else:
                        data = Variable(data).to(device=device)

                    target = Variable(target).to(device=device)

                    if conf.task == 'nlp':
                        outputs = model(data, labels=target)
                        loss = outputs[0]
                    elif conf.task == 'voice':
                        outputs, output_sizes = model(data, input_sizes)
                        outputs = outputs.transpose(0, 1).float()  # TxNxH
                        loss = criterion(outputs, target, output_sizes,
                                         target_sizes)
                    elif conf.task == "detection":
                        rois, cls_prob, bbox_pred, \
                        rpn_loss_cls, rpn_loss_box, \
                        RCNN_loss_cls, RCNN_loss_bbox, \
                        rois_label = model(im_data, im_info, gt_boxes, num_boxes)

                        loss = rpn_loss_cls + rpn_loss_box \
                                + RCNN_loss_cls + RCNN_loss_bbox

                        loss_rpn_cls = rpn_loss_cls.item()
                        loss_rpn_box = rpn_loss_box.item()
                        loss_rcnn_cls = RCNN_loss_cls.item()
                        loss_rcnn_box = RCNN_loss_bbox.item()
                        print("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \
                        % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box))
                    else:
                        output = model(data)
                        loss = criterion(output, target)

                    # ======== collect training feedback for other decision components [e.g., kuiper selector] ======
                    if conf.task == 'nlp':
                        loss_list = [loss.item()]  #[loss.mean().data.item()]
                    elif conf.task == "detection":
                        loss_list = [loss.tolist()]
                        loss = loss.mean()
                    else:
                        loss_list = loss.tolist()
                        loss = loss.mean()

                    temp_loss = sum([l**2 for l in loss_list]) / float(
                        len(loss_list))

                    # only measure the loss of the first epoch
                    if completed_steps < len(client_data):
                        if epoch_train_loss == 1e-4:
                            epoch_train_loss = temp_loss
                        else:
                            epoch_train_loss = (
                                1. - conf.loss_decay
                            ) * epoch_train_loss + conf.loss_decay * temp_loss

                    # ========= Define the backward loss ==============
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                    # ========= Weight handler ========================
                    if conf.gradient_policy == 'prox':
                        for idx, param in enumerate(model.parameters()):
                            param.data += conf.learning_rate * conf.proxy_mu * (
                                param.data - global_model[idx])

                    completed_steps += 1

                    if completed_steps == conf.local_steps:
                        break

            except Exception as ex:
                error_type = ex
                break

        model_param = [
            param.data.cpu().numpy() for param in model.parameters()
        ]
        results = {
            'clientId': clientId,
            'moving_loss': epoch_train_loss,
            'trained_size': completed_steps * conf.batch_size,
            'success': completed_steps > 0
        }
        results['utility'] = math.sqrt(epoch_train_loss) * float(
            trained_unique_samples)

        if error_type is None:
            logging.info(
                f"Training of (CLIENT: {clientId}) completes, {results}")
        else:
            logging.info(
                f"Training of (CLIENT: {clientId}) failed as {error_type}")

        results['update_weight'] = model_param
        results['wall_duration'] = 0

        return results
示例#7
0
xs = torch.tensor([10, 6, 9], dtype=torch.int)
# Target lengths
ys = torch.tensor([5, 3, 4], dtype=torch.int)

# By default, the costs (negative log-likelihood) of all samples are summed.
# This is equivalent to:
#   ctc_loss(x, y, xs, ys, average_frames=False, reduction="sum")
loss1 = ctc_loss(x, y, xs, ys)

# You can also average the cost of each sample among the number of frames.
# The averaged costs are then summed.
loss2 = ctc_loss(x, y, xs, ys, average_frames=True)

# Instead of summing the costs of each sample, you can perform
# other `reductions`: "none", "sum", or "mean"
#
# Return an array with the loss of each individual sample
losses = ctc_loss(x, y, xs, ys, reduction="none")
#
# Compute the mean of the individual losses
loss3 = ctc_loss(x, y, xs, ys, reduction="mean")
#
# First, normalize loss by number of frames, later average losses
loss4 = ctc_loss(x, y, xs, ys, average_frames=True, reduction="mean")

# Finally, there's also a nn.Module to use this loss.
ctc = CTCLoss(average_frames=True, reduction="mean", blank=0)
loss4_2 = ctc(x, y, xs, ys)

# Note: the `blank` option is also available for `ctc_loss`.
# By default it is 0.
示例#8
0
def main(args):
    if args.checkpoint == '':
        args.checkpoint = 'checkpoints'
    print(('checkpoint path: %s' % args.checkpoint))
    print(('init lr: %.8f' % args.lr))
    sys.stdout.flush()

    if not os.path.isdir(args.checkpoint):
        os.makedirs(args.checkpoint)

    start_step = 0

    train_loader = dataset.get_batch(num_workers=args.num_workers, input_dirs=args.input_dirs,
                                     input_size=args.input_size, batch_size=args.batch_size, vis=args.debug)

    # Load OCR dataset
    ocr_loader = ocr_dataset.get_batch(num_workers=2, input_list=args.ocr_input_list,
                                       batch_size=args.ocr_batch_size, norm_height=args.norm_height)

    model = resnet50(pretrained=True).cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    if args.resume:
        print('Resuming from checkpoint.')
        assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(args.resume)
        start_step = checkpoint['step']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
    else:
        print('Training from scratch.')

    model.train()

    ctc_loss = CTCLoss()

    train_loss_val, counter = 0, 0
    ctc_loss_val = 0
    good_all = 0
    gt_all = 0

    for step in range(start_step, args.max_iterators):
        # Localization data loader
        images_org, score_maps, geo_maps, training_masks, gt_outputs, label_outputs = next(train_loader)
        images = np_to_variable(images_org).permute(0, 3, 1, 2)
        score_maps = np_to_variable(score_maps).permute(0, 3, 1, 2)
        training_masks = np_to_variable(training_masks).permute(0, 3, 1, 2)
        geo_maps = np_to_variable(geo_maps)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Train the network localization and recognition
        score_pred, geo_pred = model(images)

        # Calculating the Loss
        loss = losses.loss(score_maps, score_pred, geo_maps, geo_pred, training_masks)

        train_loss_val += loss.item()

        try:
            if step > 10000:
                recog_loss, gt_b_good, gt_b_all = recognizer(images_org, images, gt_outputs, label_outputs, model,
                                                             ctc_loss, args.norm_height, args.ocr_debug)
                ctc_loss_val += recog_loss.item()
                loss = loss + recog_loss
                gt_all += gt_b_all
                good_all += gt_b_good
        except Exception:
            pass

        # Recognition data loader
        ocr_images, labels, labels_length = next(ocr_loader)
        ocr_images = np_to_variable(ocr_images).permute(0, 3, 1, 2)
        labels_pred = model.forward_ocr(ocr_images)

        probs_sizes = torch.IntTensor(
            [(labels_pred.permute(2, 0, 1).size()[0])] * (labels_pred.permute(2, 0, 1).size()[1]))
        label_sizes = torch.IntTensor(torch.from_numpy(np.array(labels_length)).int())
        labels = torch.IntTensor(torch.from_numpy(np.array(labels)).int())
        loss_ocr = ctc_loss(labels_pred.permute(2, 0, 1), labels, probs_sizes, label_sizes) / ocr_images.size(0) * 0.5

        # Calculating the Gradients
        loss_ocr.backward()
        loss.backward()

        # Update the weights
        optimizer.step()

        counter += 1

        if (step + 1) % print_interval == 0:
            train_loss_val /= counter
            ctc_loss_val /= counter

            print('\nEpoch: %d[%d] | LR: %f | Loss: %.3f | CTC_Loss: %.3f | Rec: %.5f' % (
                (step + 1) / batch_per_epoch, step + 1, optimizer.param_groups[0]['lr'], train_loss_val, ctc_loss_val,
                good_all / max(1, gt_all)))

            train_loss_val, counter = 0, 0
            ctc_loss_val = 0
            good_all = 0
            gt_all = 0

        if (step + 1) % batch_per_epoch == 0:
            checkpoint_file_name = 'LS1706203-{}.h5'.format(step + 1)
            save_checkpoint({
                'step': step + 1,
                'state_dict': model.state_dict(),
                'lr': args.lr,
                'optimizer': optimizer.state_dict(),
            }, checkpoint=args.checkpoint, filename=checkpoint_file_name)
def train(opt):
    """ dataset preparation """
    opt.select_data = opt.select_data.split('-')
    opt.batch_ratio = opt.batch_ratio.split('-')
    train_dataset = Batch_Balanced_Dataset(opt)

    AlignCollate_valid = AlignCollate(imgH=opt.imgH, imgW=opt.imgW)
    valid_dataset = hierarchical_dataset(root=opt.valid_data, opt=opt)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=opt.batch_size,
        shuffle=True,  # 'True' to check training progress with validation function.
        num_workers=int(opt.workers),
        collate_fn=AlignCollate_valid, pin_memory=True)
    print('-' * 80)

    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)
    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel,
          opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
          opt.SequenceModeling, opt.Prediction)

    # weight initialization
    for name, param in model.named_parameters():
        if 'localization_fc2' in name:
            print(f'Skip {name} as it is already initialized')
            continue
        try:
            if 'bias' in name:
                init.constant_(param, 0.0)
            elif 'weight' in name:
                init.kaiming_normal_(param)
        except Exception as e:  # for batchnorm.
            if 'weight' in name:
                param.data.fill_(1)
            continue

    # data parallel for multi-GPU
    model = torch.nn.DataParallel(model).cuda()
    model.train()
    if opt.continue_model != '':
        print(f'loading pretrained model from {opt.continue_model}')
        model.load_state_dict(torch.load(opt.continue_model))
    print("Model:")
    print(model)

    """ setup loss """
    if 'CTC' in opt.Prediction:
        criterion = CTCLoss(reduction='sum')
    else:
        criterion = torch.nn.CrossEntropyLoss(ignore_index=0).cuda()  # ignore [GO] token = ignore index 0
    # loss averager
    loss_avg = Averager()

    # filter that only require gradient decent
    filtered_parameters = []
    params_num = []
    for p in filter(lambda p: p.requires_grad, model.parameters()):
        filtered_parameters.append(p)
        params_num.append(np.prod(p.size()))
    print('Trainable params num : ', sum(params_num))
    # [print(name, p.numel()) for name, p in filter(lambda p: p[1].requires_grad, model.named_parameters())]

    # setup optimizer
    if opt.adam:
        optimizer = optim.Adam(filtered_parameters, lr=opt.lr, betas=(opt.beta1, 0.999))
    else:
        optimizer = optim.Adadelta(filtered_parameters, lr=opt.lr, rho=opt.rho, eps=opt.eps)
    print("Optimizer:")
    print(optimizer)

    """ final options """
    # print(opt)
    with open(f'./saved_models/{opt.experiment_name}/opt.txt', 'a') as opt_file:
        opt_log = '------------ Options -------------\n'
        args = vars(opt)
        for k, v in args.items():
            opt_log += f'{str(k)}: {str(v)}\n'
        opt_log += '---------------------------------------\n'
        print(opt_log)
        opt_file.write(opt_log)

    """ start training """
    start_iter = 0
    if opt.continue_model != '':
        start_iter = int(opt.continue_model.split('_')[-1].split('.')[0])
        print(f'continue to train, start_iter: {start_iter}')

    start_time = time.time()
    best_accuracy = -1
    best_norm_ED = 1e+6
    i = start_iter

    while(True):
        # train part
        for p in model.parameters():
            p.requires_grad = True

        cpu_images, cpu_texts = train_dataset.get_batch()
        image = cpu_images.cuda()
        text, length = converter.encode(cpu_texts)
        batch_size = image.size(0)

        if 'CTC' in opt.Prediction:
            preds = model(image, text)
            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
            preds = preds.permute(1, 0, 2)  # to use CTCLoss format
            cost = criterion(preds, text, preds_size, length) / batch_size

        else:
            preds = model(image, text)
            target = text[:, 1:]  # without [GO] Symbol
            cost = criterion(preds.view(-1, preds.shape[-1]), target.contiguous().view(-1))

        model.zero_grad()
        cost.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)  # gradient clipping with 5 (Default)
        optimizer.step()

        loss_avg.add(cost)

        # validation part
        if i % opt.valInterval == 0:
            elapsed_time = time.time() - start_time
            print(f'[{i}/{opt.num_iter}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}')
            # for log
            with open(f'./saved_models/{opt.experiment_name}/log_train.txt', 'a') as log:
                log.write(f'[{i}/{opt.num_iter}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}\n')
                loss_avg.reset()

                model.eval()
                valid_loss, current_accuracy, current_norm_ED, preds, gts, infer_time = validation(
                    model, criterion, valid_loader, converter, opt)
                model.train()

                for pred, gt in zip(preds[:5], gts[:5]):
                    if 'CTC' not in opt.Prediction:
                        pred = pred[:pred.find('[s]')]
                        gt = gt[:gt.find('[s]')]
                    print(f'{pred:20s}, gt: {gt:20s},   {str(pred == gt)}')
                    log.write(f'{pred:20s}, gt: {gt:20s},   {str(pred == gt)}\n')

                valid_log = f'[{i}/{opt.num_iter}] valid loss: {valid_loss:0.5f}'
                valid_log += f' accuracy: {current_accuracy:0.3f}, norm_ED: {current_norm_ED:0.2f}'
                print(valid_log)
                log.write(valid_log + '\n')

                # keep best accuracy model
                if current_accuracy > best_accuracy:
                    best_accuracy = current_accuracy
                    torch.save(model.state_dict(), f'./saved_models/{opt.experiment_name}/best_accuracy.pth')
                if current_norm_ED < best_norm_ED:
                    best_norm_ED = current_norm_ED
                    torch.save(model.state_dict(), f'./saved_models/{opt.experiment_name}/best_norm_ED.pth')
                best_model_log = f'best_accuracy: {best_accuracy:0.3f}, best_norm_ED: {best_norm_ED:0.2f}'
                print(best_model_log)
                log.write(best_model_log + '\n')

        # save model per 1e+5 iter.
        if (i + 1) % 1e+5 == 0:
            torch.save(
                model.state_dict(), f'./saved_models/{opt.experiment_name}/iter_{i+1}.pth')

        if i == opt.num_iter:
            print('end the training')
            sys.exit()
        i += 1
示例#10
0
def main(opts):

    model_name = 'OctGatedMLT'
    net = OctMLT(attention=True)
    acc = []

    if opts.cuda:
        net.cuda()

    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=base_lr,
                                 weight_decay=weight_decay)
    step_start = 0
    if os.path.exists(opts.model):
        print('loading model from %s' % args.model)
        step_start, learning_rate = net_utils.load_net(
            args.model,
            net,
            optimizer,
            load_ocr=opts.load_ocr,
            load_detection=opts.load_detection,
            load_shared=opts.load_shared,
            load_optimizer=opts.load_optimizer,
            reset_step=opts.load_reset_step)
    else:
        learning_rate = base_lr

    step_start = 0

    net.train()

    if opts.freeze_shared:
        net_utils.freeze_shared(net)

    if opts.freeze_ocr:
        net_utils.freeze_ocr(net)

    if opts.freeze_detection:
        net_utils.freeze_detection(net)

    #acc_test = test(net, codec, opts, list_file=opts.valid_list, norm_height=opts.norm_height)
    #acc.append([0, acc_test])
    ctc_loss = CTCLoss()

    data_generator = ocr_gen.get_batch(num_workers=opts.num_readers,
                                       batch_size=opts.batch_size,
                                       train_list=opts.train_list,
                                       in_train=True,
                                       norm_height=opts.norm_height,
                                       rgb=True)

    train_loss = 0
    cnt = 0

    for step in range(step_start, 300000):
        # batch
        images, labels, label_length = next(data_generator)
        im_data = net_utils.np_to_variable(images, is_cuda=opts.cuda).permute(
            0, 3, 1, 2)
        features = net.forward_features(im_data)
        labels_pred = net.forward_ocr(features)

        # backward
        '''
    acts: Tensor of (seqLength x batch x outputDim) containing output from network
        labels: 1 dimensional Tensor containing all the targets of the batch in one sequence
        act_lens: Tensor of size (batch) containing size of each output sequence from the network
        act_lens: Tensor of (batch) containing label length of each example
    '''

        probs_sizes = torch.IntTensor(
            [(labels_pred.permute(2, 0, 1).size()[0])] *
            (labels_pred.permute(2, 0, 1).size()[1]))
        label_sizes = torch.IntTensor(
            torch.from_numpy(np.array(label_length)).int())
        labels = torch.IntTensor(torch.from_numpy(np.array(labels)).int())
        loss = ctc_loss(labels_pred.permute(2, 0, 1), labels, probs_sizes,
                        label_sizes) / im_data.size(0)  # change 1.9.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if not np.isinf(loss.data.cpu().numpy()):
            train_loss += loss.data.cpu().numpy()[0] if isinstance(
                loss.data.cpu().numpy(), list) else loss.data.cpu().numpy(
                )  #net.bbox_loss.data.cpu().numpy()[0]
            cnt += 1

        if opts.debug:
            dbg = labels_pred.data.cpu().numpy()
            ctc_f = dbg.swapaxes(1, 2)
            labels = ctc_f.argmax(2)
            det_text, conf, dec_s = print_seq_ext(labels[0, :], codec)

            print('{0} \t'.format(det_text))

        if step % disp_interval == 0:

            train_loss /= cnt
            print('epoch %d[%d], loss: %.3f, lr: %.5f ' %
                  (step / batch_per_epoch, step, train_loss, learning_rate))

            train_loss = 0
            cnt = 0

        if step > step_start and (step % batch_per_epoch == 0):
            save_name = os.path.join(opts.save_path,
                                     '{}_{}.h5'.format(model_name, step))
            state = {
                'step': step,
                'learning_rate': learning_rate,
                'state_dict': net.state_dict(),
                'optimizer': optimizer.state_dict()
            }
            torch.save(state, save_name)
            print('save model: {}'.format(save_name))

            #acc_test, ted = test(net, codec, opts,  list_file=opts.valid_list, norm_height=opts.norm_height)
            #acc.append([0, acc_test, ted])
            np.savez('train_acc_{0}'.format(model_name), acc=acc)
示例#11
0
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=cfg.batchSize,
                                             shuffle=True,
                                             sampler=sampler,
                                             num_workers=int(cfg.workers),
                                             collate_fn=dataset.alignCollate(
                                                 imgH=cfg.imgH,
                                                 imgW=cfg.imgW,
                                                 keep_ratio=cfg.keep_ratio,
                                                 cuda=cfg.cuda))

    converter = utils.strLabelConverter(cfg.dic_path)

    # 2:loss
    criterion = CTCLoss()

    # 3:模型
    crnn = crnn.CRNN(cfg.imgH, cfg.nc, cfg.nclass, cfg.nh)
    if cfg.cuda:
        crnn.cuda()
        criterion = criterion.cuda()

    crnn.apply(weights_init)
    if cfg.loadCheckpoint != None:
        print('loading pretrained model from %s' % cfg.loadCheckpoint)
        crnn.load_state_dict(torch.load(cfg.loadCheckpoint))

    # 4:优化方式
    if cfg.adam:
        optimizer = optim.Adam(crnn.parameters(),
示例#12
0
文件: train.py 项目: Anmolbansal1/OCR
def main(data_path, abc, seq_proj, backend, snapshot, input_size, base_lr, step_size, max_iter, batch_size, output_dir, test_epoch, test_init, gpu):
    print(abc)
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu
    cuda = True if gpu is not '' else False

    input_size = [int(x) for x in input_size.split('x')]
    transform = Compose([
        # Rotation(),
        Translation(),
        # Scale(),
        Resize(size=(input_size[0], input_size[1]))
    ])
    if data_path is not None:
        data = TextDataset(data_path=data_path, mode="train", transform=transform, abc=abc)
    else:
        data = TestDataset(transform=transform, abc=abc)
    seq_proj = [int(x) for x in seq_proj.split('x')]
    net = load_model(data.get_abc(), seq_proj, backend, snapshot, cuda)
    optimizer = optim.Adam(net.parameters(), lr = base_lr, weight_decay=0.0001)
    lr_scheduler = StepLR(optimizer, step_size=step_size, max_iter=max_iter)
    loss_function = CTCLoss(average_frames=True, reduction="mean", blank=0)

    acc_best = 0
    epoch_count = 0
    while True:
        if (test_epoch is not None and epoch_count != 0 and epoch_count % test_epoch == 0) or (test_init and epoch_count == 0):
            print("Test phase")
            data.set_mode("test")
            net = net.eval()
            acc, avg_ed = test(net, data, data.get_abc(), cuda, visualize=False)
            net = net.train()
            data.set_mode("train")
            if acc > acc_best:
                if output_dir is not None:
                    torch.save(net.state_dict(), os.path.join(output_dir, "crnn_" + backend + "_" + str(data.get_abc()) + "_best"))
                acc_best = acc
            print("acc: {}\tacc_best: {}; avg_ed: {}".format(acc, acc_best, avg_ed))

        data_loader = DataLoader(data, batch_size=batch_size, num_workers=1, shuffle=True, collate_fn=text_collate)
        loss_mean = []
        iterator = tqdm(data_loader)
        iter_count = 0
        save_num = 0
        for sample in iterator:
            # for multi-gpu support
            if sample["img"].size(0) % len(gpu.split(',')) != 0:
                continue
            optimizer.zero_grad()
            imgs = Variable(sample["img"])
            # img = imgs[0]
            # print(img.shape)
            # cv2.imshow('image', img.numpy().transpose(1, 2, 0))
            # cv2.waitKey(0)
            labels = Variable(sample["seq"]).view(-1)
            label_lens = Variable(sample["seq_len"].int())
            if cuda:
                imgs = imgs.cuda()
            preds = net(imgs).cpu()
            pred_lens = Variable(Tensor([preds.size(0)] * batch_size).int())
            loss = loss_function(preds, labels, pred_lens, label_lens) / batch_size
            loss.backward()
            nn.utils.clip_grad_norm(net.parameters(), 10.0)
            loss_mean.append(loss.data[0])
            status = "epoch: {}; iter: {}; lr: {}; loss_mean: {}; loss: {}".format(epoch_count, lr_scheduler.last_iter+1, lr_scheduler.get_lr(), np.mean(loss_mean), loss.data[0])
            iterator.set_description(status)
            optimizer.step()
            lr_scheduler.step()
            iter_count += 1
            if output_dir is not None and iter_count % 500 == 0:
                torch.save(net.state_dict(), os.path.join(output_dir, "crnn_" + save_num + "_" + str(loss) + "_last"))
                save_num += 1
        if output_dir is not None:
            torch.save(net.state_dict(), os.path.join(output_dir, "crnn_" + backend + "_" + str(data.get_abc()) + "_last"))
        epoch_count += 1

    return