def get_loader(train_path, test_path, ratio):
    train_img_paths = []
    for img_path in glob.glob(os.path.join(train_path, '*.jpg')):
        train_img_paths.append(img_path)
    test_img_paths = []
    for img_path in glob.glob(os.path.join(test_path, '*.jpg')):
        test_img_paths.append(img_path)
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
    train_loader = torch.utils.data.DataLoader(RawDataset(train_img_paths, transform, aug=True, ratio=ratio), shuffle=True, batch_size=1)
    test_loader = torch.utils.data.DataLoader(RawDataset(test_img_paths, transform, ratio=1, aug=False), shuffle=False, batch_size=1)
    def predictAllImagesInFolder(self, src_path):

        opt = self.opts
        AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                         imgW=opt.imgW,
                                         keep_ratio_with_pad=opt.PAD)
        demo_data = RawDataset(root=src_path, opt=opt)  # use RawDataset
        demo_loader = torch.utils.data.DataLoader(
            demo_data,
            batch_size=opt.batch_size,
            shuffle=False,
            num_workers=int(opt.workers),
            collate_fn=AlignCollate_demo,
            pin_memory=torch.cuda.is_available())

        results = []
        for image_tensors, image_path_list in demo_loader:

            preds_str = self.predict(image_tensors)

            for img_name, pred in zip(image_path_list, preds_str):
                if 'Attn' in opt.Prediction:
                    pred = pred[:pred.find(
                        '[s]')]  # prune after "end of sentence" token ([s])
                results.append(f'{os.path.basename(img_name)},{pred}')

        return results
示例#3
0
def get_loader(args):
    test_img_paths = []
    for img_path in glob.glob(os.path.join(args.test_img_dir, '*.jpg')):
        test_img_paths.append(img_path)
    test_loader = torch.utils.data.DataLoader(RawDataset(test_img_paths,
                                                         transform,
                                                         ratio=1,
                                                         aug=False),
                                              shuffle=False,
                                              batch_size=1)
    return test_loader, test_img_paths
示例#4
0
文件: demo.py 项目: inouetaka/OCR_web
def original_demo(model, converter, length_for_pred, text_for_pred):
    opt = option()
    AlignCollate_demo = AlignCollate(imgH=opt['imgH'],
                                     imgW=opt['imgW'],
                                     keep_ratio_with_pad=opt['PAD'])
    demo_data = RawDataset(root=opt['image_folder'], opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt['batch_size'],
                                              shuffle=False,
                                              num_workers=int(opt['workers']),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)
    print(demo_loader)
    # predict

    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # 最大長予測用
            #torch.cuda.synchronize(device)
            if 'CTC' == opt['Prediction']:
                print('kotti')
                preds = model(image, text_for_pred).log_softmax(2)
                # 最大確率を選択し、インデックスを文字にデコードします
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.permute(1, 0, 2).max(2)
                preds_index = preds_index.transpose(1, 0).contiguous().view(-1)
                preds_str = converter.decode(preds_index.data, preds_size.data)

            else:
                preds = model(image, text_for_pred, is_train=False)

                # 最大確率を選択し、インデックスを文字にデコードします
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

            print('-' * 80)
            print('image_path\tpredicted_labels')
            print('-' * 80)
            for img_name, pred in zip(image_path_list, preds_str):
                if 'Attn' == opt['Prediction']:
                    pred = pred[:pred.find('[s]')]  # 文の終わりトークン([s])の後の剪定

                print(f'{img_name}\t{pred}')
    def add_data(self, model, new_data, num_everyclass, device):
        """add the new data to datapool and reduce the quatity of data stored.
        
        Args:
            model: representer.
            new_data: list, [[data,[label]], ...]
            label_list: list, ['0'...]
            num_everyclass: int.
        """
        # if the data pool isn't None, adjust the number of data stored
        if len(self.data_pool_dict) >= 0:
            remained_dict, msg = adjust_data_pool(self.data_pool_dict,
                                                  num_everyclass)
            self.data_pool_dict = remained_dict
            logging.info(msg)
            logging.info("num_everyclass = {}".format(num_everyclass))

        data_dict_tmp = {}
        feature_mean_dict = {}
        for class_label in new_data.classes:
            dataset_tmp = RawDataset(new_data.dir_data, new_data.dataname,
                                     new_data.task, [class_label])
            dataloader_tmp = DataLoader(dataset_tmp,
                                        batch_size=16,
                                        num_workers=1)
            data_feature = get_output(model, dataloader_tmp, device)
            feature_mean = np.mean(data_feature, axis=0)
            dist_data = np.sum(data_feature - feature_mean, axis=1)
            idx_selected = np.argsort(dist_data)[:num_everyclass]
            data_selected = get_selected_idx(new_data.ts_list, idx_selected)
            data_dict_tmp[class_label] = data_selected
            feature_mean_dict[class_label] = feature_mean

        # update the data pool
        self.data_pool_dict.update(data_dict_tmp)
        self.feature_mean.update(feature_mean_dict)
        self.classes += new_data.classes
        self.num_everyclass = num_everyclass

        self.save_datapool_to_pkl()
def demo(opt):
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)
    try:
        model = torch.nn.DataParallel(model).to(device)
    except RuntimeError:
        raise RuntimeError(device)

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            if 'CTC' in opt.Prediction:
                preds = model(image, text_for_pred)

                # Select max probabilty (greedy decoding) then decode index to character
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.max(2)
                # preds_index = preds_index.view(-1)
                preds_str = converter.decode(preds_index, preds_size)
            else:
                preds, alphas = model(image, text_for_pred, is_train=False)
                alphas = alphas.detach().cpu().numpy()
                if opt.batch_max_length == 1:
                    # select top_k probabilty (greedy decoding) then decode index to character
                    k = opt.topk
                    preds = F.softmax(preds, dim=2)
                    topk_prob, topk_id = preds.topk(k)
                    topk_id = topk_id.detach().cpu()[:, 0, :].unsqueeze(
                        dim=1).numpy()  # (batch_size, topk)
                    # concat 3(['s']) to the end of ids
                    topk_s = np.ones_like(topk_id) * 3
                    topk_id = np.concatenate((topk_id, topk_s), axis=1)
                    topk_chars = converter.decode(topk_id, length_for_pred)
                    topk_probs = topk_prob.detach().cpu(
                    )[:, 0, :]  # (batch_size, topk)
                else:
                    # select max probabilty (greedy decoding) then decode index to character
                    k = opt.topk
                    # _, preds_index = preds.max(dim=2)
                    # preds_str = converter.decode(preds_index, length_for_pred)
                    preds = F.softmax(preds, dim=2)
                    topk_prob, topk_id = preds.topk(k, dim=2)
                    topk_id = topk_id.detach().cpu().numpy(
                    )  # (batch_size, topk)
                    topk_probs = topk_prob.detach().cpu()
                    topk_strs = converter.decode(topk_id, length_for_pred)

            if opt.batch_max_length == 1:
                log = open(f'./log_demo_result.csv', 'a', encoding='utf-8')
                # topk_probs = F.softmax(topk_probs, dim=-1)
                for img_name, pred, pred_max_prob in zip(
                        image_path_list, topk_chars, topk_probs):
                    if 'Attn' in opt.Prediction:
                        pred = [p[:p.find('[s]')] for p in pred
                                ]  # prune after "end of sentence" token ([s])
                    print(img_name, end='')
                    log.write(img_name)
                    for pred_char, pred_prob in zip(pred, pred_max_prob):
                        print(',' + pred_char, end='')
                        print(',%.4f' % pred_prob, end='')
                        log.write(',' + pred_char)
                        log.write(',%.4f' % pred_prob)
                    print()
                    log.write('\n')
                log.close()
            else:
                log = open(f'./log_demo_result.txt', 'a', encoding='utf-8')
                dashed_line = '-' * 80
                head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'

                print(f'{dashed_line}\n{head}\n{dashed_line}')
                log.write(f'{dashed_line}\n{head}\n{dashed_line}\n')

                # preds_prob = F.softmax(preds, dim=2)
                # preds_max_prob, _ = preds_prob.max(dim=2)
                if 'Attn' in opt.Prediction:
                    for idx, (img_name, pred, pred_max_prob) in enumerate(
                            zip(image_path_list, topk_strs, topk_probs)):
                        pred_EOS = pred[0].find('[s]')
                        pred = [s[:pred_EOS] for s in pred
                                ]  # prune after "end of sentence" token ([s])
                        pred_max_prob = pred_max_prob[:pred_EOS, :]
                        if opt.output_split:
                            alpha = alphas[idx, :, :].transpose()
                            img = Image.open(img_name).convert('RGB')
                            width, height = img.size
                            alpha = alpha[:pred_EOS]
                            if len(alpha) > 0:
                                last_alpha_line = alpha[-1]
                                # 消除padding的影响
                                seq_length = last_alpha_line.shape[0]
                                column_range = np.arange(0, seq_length)
                                ratio = height / width
                                # too long, compress into opt shape, don't need pad
                                if ratio > opt.imgH / opt.imgW:
                                    want_height = opt.imgW * ratio
                                    compress_ratio = want_height / opt.imgH
                                    expect_last_column = seq_length
                                # need pad
                                else:
                                    compress_ratio = 1
                                    expect_height = height / width * opt.imgW
                                    expect_last_column = expect_height / opt.imgH * seq_length
                                column_range = column_range - seq_length / 2
                                column_range = column_range / 320 * (
                                    320 + (compress_ratio - 1) * 32)
                                column_range = column_range + seq_length / 2
                                # column_range = column_range - column_range[0]
                                # last_column = np.argmax(last_alpha_line)
                                last_column = np.dot(last_alpha_line,
                                                     column_range)
                                expect_linein = expect_last_column - last_column
                                split_output = os.path.join(
                                    'output',
                                    os.path.splitext(
                                        os.path.basename(img_name))[0] +
                                    '.txt')
                                with open(split_output, 'w',
                                          encoding='utf-8') as fp:
                                    draw = ImageDraw.Draw(img)
                                    for alpha_line in alpha:
                                        column = np.dot(
                                            alpha_line, column_range)
                                        line_height = int(
                                            (column - expect_linein / 2) /
                                            (last_column - expect_linein / 2) *
                                            height)
                                        # line_height = int(column / last_column * height)
                                        line = [
                                            0, line_height, width - 1,
                                            line_height
                                        ]
                                        line = list(map(str, line))
                                        fp.write(','.join(line) + '\n')
                                        draw.line(((0, line_height),
                                                   (width - 1, line_height)),
                                                  fill=(255, 0, 0),
                                                  width=2)
                                    img.save(
                                        os.path.join(
                                            'output',
                                            os.path.basename(img_name)))

                        best_pred = pred[0]
                        best_prob = pred_max_prob[:, 0]

                        # calculate confidence score (= multiply of pred_max_prob)
                        try:
                            confidence_score = best_prob.cumprod(dim=0)[-1]
                        except IndexError:
                            confidence_score = 0.0
                            # print(f'{img_name:25s}\t{pred:25s}\t can\'t predict')
                            # raise ValueError()
                        print(
                            f'{img_name:25s}\t{best_pred:25s}\t{confidence_score:0.4f}'
                        )
                        log.write(
                            f'{img_name:25s}\t{best_pred:25s}\t{confidence_score:0.4f}\n'
                        )
                        for i in range(k):
                            print(f'Candidatae {i:1d}: ', end='')
                            for j in range(pred_EOS):
                                print(
                                    f'{pred[i][j]}, prob: {pred_max_prob[j][i]:0.4f}\t',
                                    end='')
                            print()

                else:
                    preds_prob = F.softmax(preds, dim=2)
                    preds_max_prob, _ = preds_prob.max(dim=2)
                    for img_name, pred, pred_max_prob, pred_idx in zip(
                            image_path_list, preds_str, preds_max_prob,
                            preds_index):
                        pred_EOS = len(pred)
                        pred_max_prob = pred_max_prob[:pred_EOS]
                        # calculate confidence score (= multiply of pred_max_prob)
                        try:
                            confidence_score = pred_max_prob.cumprod(dim=0)[-1]
                        except IndexError:
                            confidence_score = 0.0
                            # print(f'{img_name:25s}\t{pred:25s}\t can\'t predict')
                            # raise ValueError()
                        if opt.output_split:
                            img = Image.open(img_name).convert('RGB')
                            width, height = img.size
                            pred_idx = pred_idx.detach().cpu().numpy().tolist()
                            preds_len = len(pred_idx)
                            ratio = height / width
                            # too long, compress into opt shape, don't need pad
                            if ratio > opt.imgH / opt.imgW:
                                want_height = opt.imgW * ratio
                                compress_ratio = want_height / opt.imgH
                                expect_last_column = preds_len
                            # need pad
                            else:
                                compress_ratio = 1
                                expect_height = height / width * opt.imgW
                                expect_last_column = expect_height / opt.imgH * preds_len
                            split_output = os.path.join(
                                'output',
                                os.path.splitext(os.path.basename(img_name))[0]
                                + '.txt')

                            # hyper-parameter, suggestion 6-0.46-0.21 for 320CTC
                            # TODO find hyper-parameter for 480CTC
                            CTC_start = 6
                            center_ratio = 0.46
                            zoom_ratio = 0.21
                            # for CTC_start in np.arange(6.0, 7.1, 0.1):
                            #     for center_ratio in np.arange(0.37, 0.46, 0.01):
                            #         for zoom_ratio in np.arange(0.18, 0.23, 0.01):
                            img = Image.open(img_name).convert('RGB')
                            with open(split_output, 'w',
                                      encoding='utf-8') as fp:
                                cur_pos = 0
                                draw = ImageDraw.Draw(img)
                                index_group = itertools.groupby(pred_idx)
                                for key, group in index_group:
                                    group = list(group)
                                    if key != 0:
                                        nxt_pos = cur_pos - 1 + len(group)
                                        column = (cur_pos + nxt_pos) // 2
                                        column = column - CTC_start
                                        column = (column - preds_len * center_ratio) * (1 + zoom_ratio * compress_ratio) \
                                                 + (preds_len * center_ratio)
                                        line_height = int(column /
                                                          expect_last_column *
                                                          height)

                                        line = [
                                            0, line_height, width - 1,
                                            line_height
                                        ]
                                        line = list(map(str, line))
                                        fp.write(','.join(line) + '\n')
                                        draw.line(((0, line_height),
                                                   (width - 1, line_height)),
                                                  fill=(255, 0, 0),
                                                  width=2)
                                    cur_pos += len(group)
                                img.save(
                                    os.path.join('output',
                                                 os.path.basename(img_name)))
                                # img.save(os.path.join('output', '{}_{:02d}_{:03d}_{:03d}.jpg'.format(os.path.splitext(os.path.basename(img_name))[0], int(CTC_start*10), int(center_ratio*100), int(zoom_ratio*100))))

                        print(
                            f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}'
                        )
                        log.write(
                            f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n'
                        )
                log.close()
    def extract_text(self):
        l = sorted(os.listdir(self.i_folder))
        img_to_index = {}
        count = 0
        for full_file in l:
            split_file = full_file.split(".")
            filename = split_file[0]
            img_to_index[count] = filename
            #print(count, filename)
            count += 1
            #print(filename)
            file_extension = "." + split_file[1]
            #print(filename, file_extension)
            image = imgproc.loadImage(self.i_folder + full_file)
            bboxes, polys, score_text = self.test_net(
                self.net, image, self.text_threshold, self.link_threshold,
                self.low_text, self.cuda, self.poly, self.refine_net)
            img = cv2.imread(self.i_folder + filename + file_extension)
            rgb_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            points = []
            order = []
            for i in range(0, len(bboxes)):
                sample_bbox = bboxes[i]
                min_point = sample_bbox[0]
                max_point = sample_bbox[2]
                for j, p in enumerate(sample_bbox):
                    if (p[0] <= min_point[0]):
                        min_point = (p[0], min_point[1])
                    if (p[1] <= min_point[1]):
                        min_point = (min_point[0], p[1])
                    if (p[0] >= max_point[0]):
                        max_point = (p[0], max_point[1])
                    if (p[1] >= max_point[1]):
                        max_point = (max_point[0], p[1])
                min_point = (max(min(len(rgb_img[0]), min_point[0]),
                                 0), max(min(len(rgb_img), min_point[1]), 0))
                max_point = (max(min(len(rgb_img[0]), max_point[0]),
                                 0), max(min(len(rgb_img), max_point[1]), 0))
                points.append((min_point, max_point))
                order.append(0)
            num_ordered = 0
            rows_ordered = 0
            points_sorted = []
            ordered_points_index = 0
            order_sorted = []
            while (num_ordered < len(points)):
                #find lowest-y that is unordered
                min_y = len(rgb_img)
                min_y_index = -1
                for i in range(0, len(points)):
                    if (order[i] == 0):
                        if (points[i][0][1] <= min_y):
                            min_y = points[i][0][1]
                            min_y_index = i
                rows_ordered += 1
                order[min_y_index] = rows_ordered
                num_ordered += 1
                points_sorted.append(points[min_y_index])
                order_sorted.append(rows_ordered)
                ordered_points_index = len(points_sorted) - 1

                # Group bboxes that are on the same row
                max_y = points[min_y_index][1][1]
                range_y = max_y - min_y
                for i in range(0, len(points)):
                    if (order[i] == 0):
                        min_y_i = points[i][0][1]
                        max_y_i = points[i][1][1]
                        range_y_i = max_y_i - min_y_i
                        if (max_y_i >= min_y and min_y_i <= max_y):
                            overlap = (min(max_y_i, max_y) -
                                       max(min_y_i, min_y)) / (max(
                                           1, min(range_y, range_y_i)))
                            if (overlap >= 0.30):
                                order[i] = rows_ordered
                                num_ordered += 1
                                min_x_i = points[i][0][0]
                                for j in range(ordered_points_index,
                                               len(points_sorted) + 1):
                                    if (j < len(points_sorted)
                                        ):  #insert before
                                        min_x_j = points_sorted[j][0][0]
                                        if (min_x_i < min_x_j):
                                            points_sorted.insert(j, points[i])
                                            order_sorted.insert(
                                                j, rows_ordered)
                                            break
                                    else:  #insert at the end of array
                                        points_sorted.insert(j, points[i])
                                        order_sorted.insert(j, rows_ordered)
                                        break
            for i in range(0, len(points_sorted)):
                min_point = points_sorted[i][0]
                max_point = points_sorted[i][1]
                mask_file = self.result_folder + filename + "_" + str(
                    order_sorted[i]) + "_" + str(i) + file_extension
                crop_image = rgb_img[int(min_point[1]):int(max_point[1]),
                                     int(min_point[0]):int(max_point[0])]
                #print(filename, min_point, max_point, len(rgb_img), len(rgb_img[0]))
                cv2.imwrite(mask_file, crop_image)
        AlignCollate_demo = AlignCollate(imgH=self.opt.imgH,
                                         imgW=self.opt.imgW,
                                         keep_ratio_with_pad=self.opt.PAD)
        demo_data = RawDataset(root=self.result_folder,
                               opt=self.opt)  # use RawDataset
        demo_loader = torch.utils.data.DataLoader(
            demo_data,
            batch_size=self.opt.batch_size,
            shuffle=False,
            num_workers=int(self.opt.workers),
            collate_fn=AlignCollate_demo,
            pin_memory=True)
        f = open(self.extract_text_file, "w")
        count = -1
        curr_order = 1
        curr_filename = ""
        output_string = ""
        end_line = "[SEP] "
        with torch.no_grad():
            for image_tensors, image_path_list in demo_loader:
                batch_size = image_tensors.size(0)
                image = image_tensors.to(self.device)
                #image = (torch.from_numpy(crop_image).unsqueeze(0)).to(device)
                #print(image_path_list)
                #print(image.size())
                length_for_pred = torch.IntTensor([self.opt.batch_max_length] *
                                                  batch_size).to(self.device)
                text_for_pred = torch.LongTensor(batch_size,
                                                 self.opt.batch_max_length +
                                                 1).fill_(0).to(self.device)
                preds = self.model(image, text_for_pred, is_train=False)
                _, preds_index = preds.max(2)
                preds_str = self.converter.decode(preds_index, length_for_pred)
                for path, p in zip(image_path_list, preds_str):
                    #print(path)
                    if 'Attn' in self.opt.Prediction:
                        pred_EOS = p.find('[s]')
                        p = p[:
                              pred_EOS]  # prune after "end of sentence" token ([s])
                    path_info = path[len(self.result_folder):].split(
                        ".")[0].split(
                            "_"
                        )  #ASSUMES FILE EXTENSION OF SIZE 4 (.PNG, .JPG, ETC)
                    #print(curr_filename)
                    #print(path_info[0])
                    #print("PATHINFO: ",path_info[0])
                    if (not (curr_filename == path_info[0])):
                        if (not (curr_filename == "")):
                            f.write(str(count) + "\n")
                            f.write(curr_filename + "\n")
                            f.write(output_string + "\n\n")
                        count += 1
                        curr_filename = img_to_index[count]  #path_info[0]
                        #print("CURRFILE: ", curr_filename)
                        while (not (curr_filename == path_info[0])):
                            f.write(str(count) + "\n")
                            f.write(curr_filename + "\n")
                            f.write("\n\n")
                            count += 1
                            curr_filename = img_to_index[count]  #path_info[0]
                            #print("CURRFILE: ", curr_filename)
                        output_string = ""
                        curr_order = 1
                    if (int(path_info[1]) > curr_order):
                        curr_order += 1
                        output_string += end_line
                    output_string += p + " "
            f.write(str(count) + "\n")
            f.write(curr_filename + "\n")
            f.write(output_string + "\n\n")
        f.close()
示例#8
0
def demo(opt, length, db_url):
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)
    model = torch.nn.DataParallel(model).to(device)

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            if 'CTC' in opt.Prediction:
                preds = model(image, text_for_pred)

                # Select max probabilty (greedy decoding) then decode index to character
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.max(2)
                # preds_index = preds_index.view(-1)
                preds_str = converter.decode(preds_index, preds_size)

            else:
                preds = model(image, text_for_pred, is_train=False)

                # select max probabilty (greedy decoding) then decode index to character
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

            log = open(f'./log_demo_result.txt', 'a')
            dashed_line = '-' * 80
            head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'

            print(f'{dashed_line}\n{head}\n{dashed_line}')
            log.write(f'{dashed_line}\n{head}\n{dashed_line}\n')

            preds_prob = F.softmax(preds, dim=2)
            preds_max_prob, _ = preds_prob.max(dim=2)
            for img_name, pred, pred_max_prob in zip(image_path_list,
                                                     preds_str,
                                                     preds_max_prob):
                #we are only interested in plates themselves
                if img_name.find('plate_', 0, len(img_name)) == -1:
                    continue
                if 'Attn' in opt.Prediction:
                    pred_EOS = pred.find('[s]')
                    pred = pred[:
                                pred_EOS]  # prune after "end of sentence" token ([s])
                    pred_max_prob = pred_max_prob[:pred_EOS]

                # calculate confidence score (= multiply of pred_max_prob)
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]

                #getting name of the current image
                img_name = img_name.replace('.jpg', '')
                img_name = img_name.replace('res_', '')
                img_name = img_name.replace('plate_', '')

                #cutting piece of full path which equals length
                print(length)
                img_name = img_name[length:]
                print(img_name)

                #splitting into image name and db name
                res = re.split(r'/', img_name)
                base = res[0]
                img_name = res[1]
                print(base)

                #splitting into first number of image and second (which are frame num and id respectively)
                result = re.split(r'_', img_name)
                print(result)
                engine = create_engine(db_url)
                conn = engine.connect()
                #writing recognised numbers to db
                sql = text('UPDATE table_' + base + ' SET plate_number =' +
                           pred + ' WHERE frame = ' + result[0] +
                           ' AND id = ' + result[1] + ' ;')
                engine.execute(sql)
                #print(result[0])
                #print(result[1])
                #print(base)
                print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}')
                log.write(
                    f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n')
                ##img_name =re.sub(r'\w+\/', '', img_name)
                ##result = re.split(r'_', img_name)
            log.close()
示例#9
0
def demo(opt):
    """ model configuration """
    converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    # print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel,
    #       opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
    #       opt.SequenceModeling, opt.Prediction)
    model = torch.nn.DataParallel(model).to(device)

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()

    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            all_pred_strs = []
            all_confidence_scores = []
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            predss = model(image, text_for_pred, is_train=False)[0]

            for i, preds in enumerate(predss):
                confidence_score_list = []
                pred_str_list = []

                # select max probability (greedy decoding) then decode index to character
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

                preds_prob = F.softmax(preds, dim=2)
                preds_max_prob, _ = preds_prob.max(dim=2)
                for pred, pred_max_prob in zip(preds_str, preds_max_prob):
                    pred_EOS = pred.find('[s]')
                    pred = pred[:
                                pred_EOS]  # prune after "end of sentence" token ([s])
                    pred_str_list.append(pred)
                    pred_max_prob = pred_max_prob[:pred_EOS]

                    # calculate confidence score (= multiply of pred_max_prob)
                    try:
                        confidence_score = pred_max_prob.cumprod(
                            dim=0)[-1].cpu().numpy()
                    except:
                        confidence_score = 0  # for empty pred case, when prune after "end of sentence" token ([s])
                    confidence_score_list.append(confidence_score)

                all_pred_strs.append(pred_str_list)
                all_confidence_scores.append(confidence_score_list)

            all_confidence_scores = np.array(all_confidence_scores)
            all_pred_strs = np.array(all_pred_strs)

            best_pred_index = np.argmax(all_confidence_scores, axis=0)
            best_pred_index = np.expand_dims(best_pred_index, axis=0)

            # Get max predition per image through blocks
            all_pred_strs = np.take_along_axis(all_pred_strs,
                                               best_pred_index,
                                               axis=0)[0]
            all_confidence_scores = np.take_along_axis(all_confidence_scores,
                                                       best_pred_index,
                                                       axis=0)[0]

            log = open(f'./log_demo_result.txt', 'w')
            dashed_line = '-' * 80
            head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'

            print(f'{dashed_line}\n{head}\n{dashed_line}')
            log.write(f'{dashed_line}\n{head}\n{dashed_line}\n')
            for img_name, pred, confidence_score in zip(
                    image_path_list, all_pred_strs, all_confidence_scores):
                print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}')
                log.write(
                    f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n')

            log.close()
示例#10
0
def demo(opt):
    """ model configuration """
    if 'Transformer' in opt.SequenceModeling:
        converter = TransformerLabelConverter(opt.character)
    elif 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)

    # load model
    if opt.saved_model != '':
        print('loading pretrained model from %s' % opt.saved_model)
        checkpoint = torch.load(opt.saved_model)
        if type(checkpoint) == dict:
            model.load_state_dict(checkpoint['state_dict'])
        else:
            model.load_state_dict(checkpoint)
        del checkpoint
        torch.cuda.empty_cache()

    model = torch.nn.DataParallel(model)
    if torch.cuda.is_available():
        model = model.cuda()

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()
    dict_gt = {}
    with open('gt.txt', 'r') as gt_file:
        gt = gt_file.readlines()
        for line in gt:
            key = line.split(', "')[0]
            value = line.split(', "')[1].replace('"\n', '').lower()
            dict_gt[key] = value
    for image_tensors, image_path_list in demo_loader:
        batch_size = image_tensors.size(0)
        with torch.no_grad():
            image = image_tensors.cuda()
            # For max length prediction
            length_for_pred = torch.cuda.IntTensor([opt.batch_max_length] *
                                                   batch_size)
            text_for_pred = torch.cuda.LongTensor(
                batch_size, opt.batch_max_length + 1).fill_(0)
        if 'Transformer' in opt.SequenceModeling:
            preds = model(image, text_for_pred, is_train=False)
            # select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index, length_for_pred)

        elif 'CTC' in opt.Prediction:
            preds = model(image, text_for_pred).log_softmax(2)

            # Select max probabilty (greedy decoding) then decode index to character
            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
            _, preds_index = preds.permute(1, 0, 2).max(2)
            preds_index = preds_index.transpose(1, 0).contiguous().view(-1)
            preds_str = converter.decode(preds_index.data, preds_size.data)

        else:
            preds = model(image, text_for_pred, is_train=False)

            # select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index, length_for_pred)

        print('-' * 80)
        print('image_path\tpredicted_labels')
        print('-' * 80)
        for img_name, pred in zip(image_path_list, preds_str):
            if 'Transformer' in opt.SequenceModeling:
                pred = pred[:pred.find('</s>')]
            elif 'Attn' in opt.Prediction:
                # prune after "end of sentence" token ([s])
                pred = pred[:pred.find('[s]')]
            raw_img = cv2.imread(img_name)
            raw_img = cv2.resize(raw_img, (200, 64))
            tmp_img = np.zeros((128, 200, 3), np.uint8)
            tmp_img.fill(255)
            tmp_img[:64, :200] = raw_img
            raw_img = tmp_img
            font = cv2.FONT_HERSHEY_SIMPLEX
            bottomLeftCornerOfText = (5, 90)
            fontScale = 1
            fontColor = (0, 0, 255)
            lineType = 2
            if pred == dict_gt[img_name.split('/')[-1]]:
                cv2.putText(raw_img, pred, (5, 90), font, fontScale,
                            (0, 255, 0), lineType)
                raw_img = raw_img[:96, :200]
                cv2.imwrite('./trash/true/' + img_name.split('/')[-1], raw_img)
            else:
                cv2.putText(raw_img, pred, (5, 90), font, fontScale,
                            (0, 0, 255), lineType)
                cv2.putText(raw_img, dict_gt[img_name.split('/')[-1]],
                            (5, 125), font, fontScale, (0, 255, 0), lineType)
                cv2.imwrite('./trash/false/' + img_name.split('/')[-1],
                            raw_img)
            print(f'{img_name}\t{pred}')
示例#11
0
def demo(opt):
    inputimage = opt.input_image
    boxesscv = opt.boxescsv
    bboxes = parse_csv(inputimage, boxesscv)
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)
    model = torch.nn.DataParallel(model).to(device)

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            if 'CTC' in opt.Prediction:
                preds = model(image, text_for_pred)

                # Select max probabilty (greedy decoding) then decode index to character
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.max(2)
                # preds_index = preds_index.view(-1)
                preds_str = converter.decode(preds_index, preds_size)

            else:
                preds = model(image, text_for_pred, is_train=False)

                # select max probabilty (greedy decoding) then decode index to character
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

            log = open(f'{opt.output_folder}result.csv', 'w')

            preds_prob = F.softmax(preds, dim=2)
            preds_max_prob, _ = preds_prob.max(dim=2)
            for img_index, (pred, pred_max_prob) in enumerate(
                    zip(preds_str, preds_max_prob)):
                if 'Attn' in opt.Prediction:
                    pred_EOS = pred.find('[s]')
                    pred = pred[:
                                pred_EOS]  # prune after "end of sentence" token ([s])
                    pred_max_prob = pred_max_prob[:pred_EOS]

                # calculate confidence score (= multiply of pred_max_prob)
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]
                for pts in bboxes[img_index]:
                    x, y = pts
                    log.write(f'{x},{y},')
                log.write(f'{pred}\n')

            log.close()
            # copy log to local output folder
            os.system(f'cp {opt.output_folder}result.csv /input/output')
            shutil.make_archive('per_word_visual', 'zip', '/input/output')
示例#12
0
def demo(opt):
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    elif 'Bert' in opt.Prediction:
        converter = TransformerConverter(opt.character, opt.batch_max_length)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)
    opt.alphabet_size = len(opt.character) + 2  # +2 for [UNK]+[EOS]

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel,
          opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
          opt.SequenceModeling, opt.Prediction)

    model = torch.nn.DataParallel(model)
    if torch.cuda.is_available():
        model = model.cuda()

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(
        demo_data, batch_size=opt.batch_size,
        shuffle=False,
        num_workers=int(opt.workers),
        collate_fn=AlignCollate_demo, pin_memory=True)

    # mkdir result
    experiment_name = os.path.join('./result', opt.image_folder.split('/')[-2])
    if not os.path.exists(experiment_name):
        os.makedirs(experiment_name)
    result = {}

    # predict
    model.eval()
    for idx, (image_tensors, image_path_list) in enumerate(demo_loader):
        batch_size = image_tensors.size(0)
        with torch.no_grad():
            image = image_tensors.cuda()
            # For max length prediction
            length_for_pred = torch.cuda.IntTensor([opt.batch_max_length] * batch_size)
            text_for_pred = torch.cuda.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0)

        if 'CTC' in opt.Prediction:
            preds = model(image, text_for_pred).log_softmax(2)

            # Select max probabilty (greedy decoding) then decode index to character
            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
            _, preds_index = preds.permute(1, 0, 2).max(2)
            preds_index = preds_index.transpose(1, 0).contiguous().view(-1)
            preds_str = converter.decode(preds_index.data, preds_size.data)

        elif 'Bert' in opt.Prediction:
            with torch.no_grad():
                pad_mask = None
                preds = model(image, pad_mask)

                # select max probabilty (greedy decoding) then decode index to character
                _, preds_index = preds[1].max(2)
                length_for_pred = torch.cuda.IntTensor([preds_index.size(-1)] * batch_size)
                preds_str = converter.decode(preds_index, length_for_pred)

        else:
            preds = model(image, text_for_pred, is_train=False)

            # select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index, length_for_pred)

        print(f'{idx}/{len(demo_data) / opt.batch_size}')

        for img_name, pred in zip(image_path_list, preds_str):
            if 'Attn' in opt.Prediction:
                pred = pred[:pred.find('[s]')]  # prune after "end of sentence" token ([s])

            # for show

            # write in json
            name = f'{img_name}'.split('/')[-1].replace('gt', 'res').split('.')[0]
            value = [{"transcription": f'{pred}'}]
            result[name] = value

    with open(f'{experiment_name}/result.json', 'w') as f:
        json.dump(result, f)
        print("writed finish...")
def demo(opt):
    """ model configuration """
    if opt.guide_training :
      from model_guide import Model
    else :
      from model import Model
    if opt.baiduCTC:
        converter = CTCLabelConverterForBaiduWarpctc(opt.character)
    else :
        converter = CTCLabelConverter(opt.character)
    if opt.Prediction == 'Attn' :
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)
    opt.num_class_ctc = opt.num_class
    opt.num_class_attn = opt.num_class_ctc + 1

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel,
          opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
          opt.SequenceModeling, opt.Prediction)
    model = torch.nn.DataParallel(model).to(device)

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model, map_location=device), strict = False)

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(
        demo_data, batch_size=opt.batch_size,
        shuffle=False,
        num_workers=int(opt.workers),
        collate_fn=AlignCollate_demo, pin_memory=True)

    # predict
    model.eval()
    data = pd.DataFrame()
    with torch.no_grad():
        ind = 0
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device)

            if 'CTC' in opt.Prediction:
                if opt.guide_training :
                    preds = model.module.inference(image, text_for_pred)
                else :
                    preds = model(image, text_for_pred)

                # Select max probabilty (greedy decoding) then decode index to character
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)

                # Select max probabilty (greedy decoding) then decode index to character
                if opt.baiduCTC:
                    if (opt.beam_search):
                      preds_index = preds
                    else :
                      _, preds_index = preds.max(2)
                      preds_index = preds_index.view(-1)
                else:
                    _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index.data, preds_size.data,opt.beam_search)
            else:
                preds = model(image, text_for_pred, is_train=False)

                # select max probabilty (greedy decoding) then decode index to character
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)


            log = open(f'./log_demo_result.txt', 'a')
            dashed_line = '-' * 80
            head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'
            
            print(f'{dashed_line}\n{head}\n{dashed_line}')
            log.write(f'{dashed_line}\n{head}\n{dashed_line}\n')

            preds_prob = F.softmax(preds, dim=2)
            preds_max_prob, _ = preds_prob.max(dim=2)
            for img_name, pred, pred_max_prob in zip(image_path_list, preds_str, preds_max_prob):
                if 'Attn' in opt.Prediction:
                    pred_EOS = pred.find('[s]')
                    pred = pred[:pred_EOS]  # prune after "end of sentence" token ([s])
                    pred_max_prob = pred_max_prob[:pred_EOS]

                # calculate confidence score (= multiply of pred_max_prob)
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]
                filename = img_name
                label = pred
                conf = round(confidence_score.item(),3)
                img = cv2.imread(filename)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img_pil = Image.fromarray(img)
                img_buffer = io.BytesIO()
                img_pil.save(img_buffer, format="PNG")
                imgStr = base64.b64encode(img_buffer.getvalue()).decode("utf-8") 

                data.loc[ind, 'img'] = '<img src="data:image/png;base64,{0:s}">'.format(imgStr)
                data.loc[ind, 'id'] = filename
                data.loc[ind, 'label'] = label
                data.loc[ind, 'conf'] = conf
                ind+=1
                print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}')
                log.write(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n')

            log.close()
        html_all = data.to_html(escape=False)
        if opt.is_save :
            text_file = open("result.html", "w") 
            text_file.write(html_all) 
            text_file.close() 
示例#14
0
def index():
    model, converter, length_for_pred, text_for_pred, opt = loader()
    start_time = time.time()

    AlignCollate_demo = AlignCollate(imgH=opt['imgH'],
                                     imgW=opt['imgW'],
                                     keep_ratio_with_pad=opt['PAD'])
    demo_data = RawDataset(root=opt['image_folder'], opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt['batch_size'],
                                              shuffle=False,
                                              num_workers=int(opt['workers']),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    get_data = time.time() - start_time

    # predict
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # 最大長予測用
            # torch.cuda.synchronize(device)
            if 'CTC' in opt['Prediction']:
                preds = model(image, text_for_pred)  #.log_softmax(2)
                preds = preds.log_softmax(2)
                # 最大確率を選択し、インデックスを文字にデコードします
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.max(2)
                preds_index = preds_index.view(-1)
                preds_str = converter.decode(preds_index.data, preds_size.data)

            else:
                preds = model(image, text_for_pred, is_train=False)

                # 最大確率を選択し、インデックスを文字にデコードします
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

            print('-' * 80)
            print('image_path\tpredicted_labels')
            print('-' * 80)
            for img_name, pred in zip(image_path_list, preds_str):
                if 'Attn' in opt['Prediction']:
                    pred = pred[:pred.find('[s]')]  # 文の終わりトークン([s])の後の剪定

                print(f'{img_name}\t{pred}')

        forward_time = time.time() - start_time
        only_infer_time = forward_time - get_data

        print('*' * 80)
        print('get_dta_time:{:.5f}[sec]'.format(get_data))
        print('only_infer_time:{:.5f}[sec]'.format(only_infer_time))
        print('total_time:{:.5f}[sec]'.format(forward_time))
        print('*' * 80)

        img_name = [i[9:] for i in image_path_list]
        items = {}
        for path, pred in zip(img_name, preds_str):
            items[path] = pred

    return render_template('index.html', images=items)
示例#15
0
def runDeepTextNet(segmentedImagesList):
    opt = argparse.Namespace(FeatureExtraction='ResNet',
                             PAD=False,
                             Prediction='Attn',
                             SequenceModeling='BiLSTM',
                             Transformation='TPS',
                             batch_max_length=25,
                             batch_size=192,
                             character='0123456789abcdefghijklmnopqrstuvwxyz',
                             hidden_size=256,
                             image_folder='demo_image/',
                             imgH=32,
                             imgW=100,
                             input_channel=1,
                             num_class=38,
                             num_fiducial=20,
                             num_gpu=0,
                             output_channel=512,
                             rgb=False,
                             saved_model='TPS-ResNet-BiLSTM-Attn.pth',
                             sensitive=False,
                             workers=4)

    model = Model(opt)
    model = torch.nn.DataParallel(model).to('cpu')
    directory = "TPS-ResNet-BiLSTM-Attn.pth"
    model.load_state_dict(torch.load(directory, map_location='cpu'))

    converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)
    if opt.rgb:
        opt.input_channel = 3

    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=segmentedImagesList, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()

    out_preds_texts = []
    for image_tensors, image_path_list in demo_loader:
        batch_size = image_tensors.size(0)
        image = image_tensors.to(device)
        # For max length prediction
        length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                          batch_size).to(device)
        text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                         1).fill_(0).to(device)
        preds = model(image, text_for_pred, is_train=False)
        # select max probabilty (greedy decoding) then decode index to character
        _, preds_index = preds.max(2)
        preds_str = converter.decode(preds_index, length_for_pred)
        preds_prob = F.softmax(preds, dim=2)
        preds_max_prob, _ = preds_prob.max(dim=2)
        for img_name, pred, pred_max_prob in zip(image_path_list, preds_str,
                                                 preds_max_prob):
            if 'Attn' in opt.Prediction:
                pred_EOS = pred.find('[s]')
                pred = pred[:
                            pred_EOS]  # prune after "end of sentence" token ([s])
                pred_max_prob = pred_max_prob[:pred_EOS]

            # calculate confidence score (= multiply of pred_max_prob)
            confidence_score = pred_max_prob.cumprod(dim=0)[-1]
            # print(pred)
            out_preds_texts.append(pred)
    # print(out_preds_texts)

    sentence_out = [' '.join(out_preds_texts)]
    return (sentence_out)
示例#16
0
model.load_state_dict(pre)


#text model-------------------------
class args(object):
    #必要的一些参数设置
    def __init__(self):
        self.rgb = True
        self.imgW = 128
        self.imgH = 128
        self.path = os.path.join(os.getcwd(), 'test_imgs')
        self.batch_size = 4


opt = args()
test_loader = RawDataset(opt.path, opt)
#length_of_data = len(test_loader)#图片数量
test_set = torch.utils.data.DataLoader(dataset=test_loader,
                                       batch_size=opt.batch_size,
                                       shuffle=False,
                                       pin_memory=True)

model.eval()
fig_i = 0
for batch_x, path_x in test_set:
    if len(batch_x) == 0:
        break
    fig_i += 1
    x_tensors = batch_x.to(device)
    out = model(x_tensors)
    pred = torch.max(out, 1)[1]
示例#17
0
def demo(opt):
    """ model configuration """
    lists = []  #목적지라고 생각하는 사진에서 인식한 text를 담을 배열

    converter = AttnLabelConverter(opt.character)  #ATTN

    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3

    model = Model(opt)  #model.py의 Model import

    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling,
          opt.Prediction)  #파라미터 값 정보 출력

    model = torch.nn.DataParallel(model).to(device)  #GPU로 데이터 병렬 처리 진행

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model,
                                     map_location=device))  #모델의 매개변수를 불러옴

    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data1 = RawDataset(root=opt.image_folder1,
                            opt=opt)  # use RawDataset 간판탐지결과
    demo_data2 = RawDataset(root=opt.image_folder2,
                            opt=opt)  # use RawDataset 구글맵문자열탐지결과

    demo_loader1 = torch.utils.data.DataLoader(demo_data1,
                                               batch_size=opt.batch_size,
                                               shuffle=False,
                                               num_workers=int(opt.workers),
                                               collate_fn=AlignCollate_demo,
                                               pin_memory=True)
    demo_loader2 = torch.utils.data.DataLoader(demo_data2,
                                               batch_size=opt.batch_size,
                                               shuffle=False,
                                               num_workers=int(opt.workers),
                                               collate_fn=AlignCollate_demo,
                                               pin_memory=True)

    # predict
    model.eval()
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader1:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)

            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            #ATTn
            preds = model(image, text_for_pred, is_train=False)

            # select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index, length_for_pred)

            log = open(f'./log_demo_result.txt', 'a')  #이어서 쓸수 있게 열고
            dashed_line = '-' * 80
            head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'

            print(f'{dashed_line}\n{head}\n{dashed_line}')  #테이블 양식 출력
            log.write(
                f'{dashed_line}\n{head}\n{dashed_line}\n')  #txt에 테이블 양식 저장

            preds_prob = F.softmax(preds, dim=2)
            preds_max_prob, _ = preds_prob.max(dim=2)
            for img_name, pred, pred_max_prob in zip(image_path_list,
                                                     preds_str,
                                                     preds_max_prob):
                pred_EOS = pred.find('[s]')
                pred = pred[:
                            pred_EOS]  # prune after "end of sentence" token ([s])
                pred_max_prob = pred_max_prob[:pred_EOS]

                # calculate confidence score (= multiply of pred_max_prob) confidence score 값을 계산
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]

                lists.append(pred)
                print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}'
                      )  #구한 값을 출력
                log.write(
                    f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n'
                )  #구한 값을 txt에 저장

            log.close()  #파일 닫기

    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader2:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            #ATTn
            preds = model(image, text_for_pred, is_train=False)

            # select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index, length_for_pred)

            log = open(f'./log_demo_result.txt', 'a')  #이어서 쓸수 있게 열고
            dashed_line = '-' * 80
            head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'

            print(f'{dashed_line}\n{head}\n{dashed_line}')  #테이블 양식 출력
            log.write(
                f'{dashed_line}\n{head}\n{dashed_line}\n')  #txt에 테이블 양식 저장

            preds_prob = F.softmax(preds, dim=2)
            preds_max_prob, _ = preds_prob.max(dim=2)
            for img_name, pred, pred_max_prob in zip(image_path_list,
                                                     preds_str,
                                                     preds_max_prob):
                pred_EOS = pred.find('[s]')
                pred = pred[:
                            pred_EOS]  # prune after "end of sentence" token ([s])
                pred_max_prob = pred_max_prob[:pred_EOS]

                # confidence score 값을 계산
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]

                print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}'
                      )  #구한 값을 출력
                log.write(
                    f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n'
                )  #구한 값을 txt에 저장
                if pred in lists:
                    print(pred + "은(는) 알맞은 목적지입니다.")
                else:
                    print(pred + "은(는) 알맞은 목적지가 아닙니다.")

            log.close()  #파일 닫기
示例#18
0
def demo(args):

    """Open csv file wherein you are going to write the Predicted Words"""
    data = pd.read_csv('../data/craft_output/data.csv')

    """ model configuration """
    if 'CTC' in args.Prediction:
        converter = CTCLabelConverter(args.character)
    else:
        converter = AttnLabelConverter(args.character)
    args.num_class = len(converter.character)

    if args.rgb:
        args.input_channel = 3
    model = Model(args)
    print('model input parameters', args.imgH, args.imgW, args.num_fiducial, args.input_channel, args.output_channel,
          args.hidden_size, args.num_class, args.batch_max_length, args.Transformation, args.FeatureExtraction,
          args.SequenceModeling, args.Prediction)
    model = torch.nn.DataParallel(model).to(device)

    # load model
    print('loading pretrained model from %s' % args.saved_model)
    model.load_state_dict(torch.load(args.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=args.imgH, imgW=args.imgW, keep_ratio_with_pad=args.PAD)
    demo_data = RawDataset(root=args.image_folder, args=args)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(
        demo_data, batch_size=args.batch_size,
        shuffle=False,
        num_workers=int(args.workers),
        collate_fn=AlignCollate_demo, pin_memory=True)

    # predict
    model.eval()
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([args.batch_max_length] * batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, args.batch_max_length + 1).fill_(0).to(device)

            if 'CTC' in args.Prediction:
                preds = model(image, text_for_pred)

                # Select max probabilty (greedy decoding) then decode index to character
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.max(2)
                # preds_index = preds_index.view(-1)
                preds_str = converter.decode(preds_index.data, preds_size.data)

            else:
                preds = model(image, text_for_pred, is_train=False)

                # select max probabilty (greedy decoding) then decode index to character
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

            dashed_line = '-' * 80
            head = f'{"image_path":25s}\t {"predicted_labels":25s}\t confidence score'
            
            print(f'{dashed_line}\n{head}\n{dashed_line}')
            # log.write(f'{dashed_line}\n{head}\n{dashed_line}\n')

            preds_prob = F.softmax(preds, dim=2)
            preds_max_prob, _ = preds_prob.max(dim=2)
            for img_name, pred, pred_max_prob in zip(image_path_list, preds_str, preds_max_prob):
                
                
                start = '../data/crop_img/'
                path = os.path.relpath(img_name, start)

                folder = os.path.dirname(path)

                image_name=os.path.basename(path)

                file_name='_'.join(image_name.split('_')[:-8])

                txt_file=os.path.join(start, folder, file_name)                
                
                log = open(f'{txt_file}_log_demo_result.txt', 'a')
                if 'Attn' in args.Prediction:
                    pred_EOS = pred.find('[s]')
                    pred = pred[:pred_EOS]  # prune after "end of sentence" token ([s])
                    pred_max_prob = pred_max_prob[:pred_EOS]

                # calculate confidence score (= multiply of pred_max_prob)
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]
                print(f'{image_name:25s}\t {pred:25s}\t {confidence_score:0.4f}')
                log.write(f'{image_name:25s}\t {pred:25s}\t {confidence_score:0.4f}\n')

            log.close()
示例#19
0
def demoToTxt1(image_folder, saved_model, txtFile):  # sensitive
    parser = argparse.ArgumentParser()
    parser.add_argument('--image_folder',
                        default=image_folder,
                        help='path to image_folder which contains text images')
    parser.add_argument('--workers',
                        type=int,
                        help='number of data loading workers',
                        default=4)
    parser.add_argument('--batch_size',
                        type=int,
                        default=100,
                        help='input batch size')
    parser.add_argument('--saved_model',
                        default=saved_model,
                        help="path to saved_model to evaluation")
    """ Data processing """
    parser.add_argument('--batch_max_length',
                        type=int,
                        default=20,
                        help='maximum-label-length')
    parser.add_argument('--imgH',
                        type=int,
                        default=32,
                        help='the height of the input image')
    parser.add_argument('--imgW',
                        type=int,
                        default=100,
                        help='the width of the input image')
    parser.add_argument('--rgb', action='store_true', help='use rgb input')
    parser.add_argument('--character',
                        type=str,
                        default='0123456789',
                        help='character label')
    parser.add_argument('--sensitive',
                        default=True,
                        help='for sensitive character mode')
    parser.add_argument('--PAD',
                        default=False,
                        action='store_true',
                        help='whether to keep ratio then pad for image resize')
    """ Model Architecture """
    parser.add_argument('--Transformation',
                        default='TPS',
                        type=str,
                        help='Transformation stage. None|TPS')
    parser.add_argument('--FeatureExtraction',
                        default='ResNet',
                        type=str,
                        help='FeatureExtraction stage. VGG|RCNN|ResNet')
    parser.add_argument('--SequenceModeling',
                        default='BiLSTM',
                        type=str,
                        help='SequenceModeling stage. None|BiLSTM')
    parser.add_argument('--Prediction',
                        default='CTC',
                        type=str,
                        help='Prediction stage. CTC|Attn')
    parser.add_argument('--num_fiducial',
                        type=int,
                        default=20,
                        help='number of fiducial points of TPS-STN')
    parser.add_argument(
        '--input_channel',
        type=int,
        default=1,
        help='the number of input channel of Feature extractor')
    parser.add_argument(
        '--output_channel',
        type=int,
        default=512,
        help='the number of output channel of Feature extractor')
    parser.add_argument('--hidden_size',
                        type=int,
                        default=256,
                        help='the size of the LSTM hidden state')

    opt = parser.parse_args()
    """ vocab / character number configuration """
    if opt.sensitive:
        opt.character += 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
        # opt.character = string.printable[:-6]  # same with ASTER setting (use 94 char).

    cudnn.benchmark = True
    cudnn.deterministic = True
    opt.num_gpu = torch.cuda.device_count()
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)

    model = torch.nn.DataParallel(model)
    if torch.cuda.is_available():
        model = model.cuda()

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()
    saved_file = open(txtFile, 'w')
    for image_tensors, image_path_list in demo_loader:
        batch_size = image_tensors.size(0)
        with torch.no_grad():
            image = image_tensors.cuda()
            # For max length prediction
            length_for_pred = torch.cuda.IntTensor([opt.batch_max_length] *
                                                   batch_size)
            text_for_pred = torch.cuda.LongTensor(
                batch_size, opt.batch_max_length + 1).fill_(0)

        if 'CTC' in opt.Prediction:
            preds = model(image, text_for_pred).log_softmax(2)

            # Select max probabilty (greedy decoding) then decode index to character
            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
            _, preds_index = preds.permute(1, 0, 2).max(2)
            preds_index = preds_index.transpose(1, 0).contiguous().view(-1)
            preds_str = converter.decode(preds_index.data, preds_size.data)

        else:
            preds = model(image, text_for_pred, is_train=False)

            # select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index, length_for_pred)

        print('-' * 80)
        print('image_path\tpredicted_labels')
        print('-' * 80)

        for img_name, pred in zip(image_path_list, preds_str):
            if 'Attn' in opt.Prediction:
                pred = pred[:pred.find(
                    '[s]')]  # prune after "end of sentence" token ([s])
            print(f'{img_name}\t{pred}')
            saved_file.write(f'{img_name}\t{pred}\n')
 def load_data_pool(self):
     """load data from data pool, return a Dataset."""
     logging.info("load data from data pool.")
     return RawDataset(self.dir_data, self.dataname, 'data_pool',
                       self.classes)
示例#21
0
def _textRecognition(opt):
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)
    model = torch.nn.DataParallel(model).to(device)

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    char_list = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"

    csv_filename = os.path.join(opt.output_dirpath, "text_information.csv")
    Header = ["bookID", "prediction"]

    with open(csv_filename, 'w') as f:
        writer = csv.DictWriter(f, fieldnames=Header)
        writer.writeheader()

        model.eval()
        with torch.no_grad():
            for image_tensors, image_path_list in demo_loader:
                batch_size = image_tensors.size(0)
                image = image_tensors.to(device)
                # For max length prediction
                length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                                  batch_size).to(device)
                text_for_pred = torch.LongTensor(
                    batch_size, opt.batch_max_length + 1).fill_(0).to(device)

                if 'CTC' in opt.Prediction:
                    preds = model(image, text_for_pred)

                    # Select max probabilty (greedy decoding) then decode index to character
                    preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                    _, preds_index = preds.max(2)
                    preds_index = preds_index.view(-1)
                    preds_str = converter.decode(preds_index.data,
                                                 preds_size.data)

                else:
                    preds = model(image, text_for_pred, is_train=False)

                    # select max probabilty (greedy decoding) then decode index to character
                    _, preds_index = preds.max(2)
                    preds_str = converter.decode(preds_index, length_for_pred)

                #log = open(f'./text_information.csv', 'a')
                dashed_line = '-' * 80
                head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'

                print(f'{dashed_line}\n{head}\n{dashed_line}')

                preds_prob = F.softmax(preds, dim=2)
                preds_max_prob, _ = preds_prob.max(dim=2)
                for img_name, pred, pred_max_prob in zip(
                        image_path_list, preds_str, preds_max_prob):
                    if 'Attn' in opt.Prediction:
                        pred_EOS = pred.find('[s]')
                        pred = pred[:
                                    pred_EOS]  # prune after "end of sentence" token ([s])
                        pred_max_prob = pred_max_prob[:pred_EOS]

                    # calculate confidence score (= multiply of pred_max_prob)
                    #print("{}:{}".format(pred_max_prob,len(pred_max_prob)))
                    try:
                        confidence_score = pred_max_prob.cumprod(dim=0)[-1]
                    except:
                        deleteImageAndText(opt.book_img_dirpath, img_name)
                        continue
                    #confidence_score = pred_max_prob.cumprod(dim=0)[-1]
                    pred = pred[0]

                    if confidence_score < 0.5:
                        pred = "Unreadble"
                        deleteImageAndText(opt.book_img_dirpath, img_name)
                        continue

                    elif not (pred in char_list):
                        pred = "Undefined"

                    # extract the name part of the image
                    filename = os.path.basename(img_name)

                    print(
                        f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}')

                    writer.writerow({"bookID": filename, "prediction": pred})
示例#22
0
def demo(opt):
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)
    model = torch.nn.DataParallel(model).to(device)

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            if 'CTC' in opt.Prediction:
                preds = model(image, text_for_pred).log_softmax(2)

                # Select max probabilty (greedy decoding) then decode index to character
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.permute(1, 0, 2).max(2)
                preds_index = preds_index.transpose(1, 0).contiguous().view(-1)
                preds_str = converter.decode(preds_index.data, preds_size.data)

            else:
                preds = model(image, text_for_pred, is_train=False)

                # select max probabilty (greedy decoding) then decode index to character
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

            print('-' * 80)
            print('image_path\tpredicted_labels')
            print('-' * 80)
            for img_name, pred in zip(image_path_list, preds_str):
                if 'Attn' in opt.Prediction:
                    pred = pred[:pred.find(
                        '[s]')]  # prune after "end of sentence" token ([s])

                print(f'{img_name}\t{pred}')
示例#23
0
def demo(opt):
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)

    #print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel,
    #	  opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
    #	  opt.SequenceModeling, opt.Prediction)

    model = torch.nn.DataParallel(model).to(device)

    # load model
    #print('loading pretrained model from %s' % opt.saved_model)

    model.load_state_dict(torch.load(opt.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval

    # Lista con los valores transcriptos
    predList = list()
    retList = dict()

    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            if 'CTC' in opt.Prediction:
                preds = model(image, text_for_pred).log_softmax(2)

                # Select max probabilty (greedy decoding) then decode index to character
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.max(2)
                preds_index = preds_index.view(-1)
                preds_str = converter.decode(preds_index.data, preds_size.data)

            else:
                preds = model(image, text_for_pred, is_train=False)

                # select max probabilty (greedy decoding) then decode index to character
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

            preds_prob = F.softmax(preds, dim=2)
            preds_max_prob, _ = preds_prob.max(dim=2)
            for img_name, pred, pred_max_prob in zip(image_path_list,
                                                     preds_str,
                                                     preds_max_prob):
                if 'Attn' in opt.Prediction:
                    pred_EOS = pred.find('[s]')
                    pred = pred[:
                                pred_EOS]  # prune after "end of sentence" token ([s])
                    pred_max_prob = pred_max_prob[:pred_EOS]

                # calculate confidence score (= multiply of pred_max_prob)
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]

                #Transcripciones
                restult = {
                    "img_name": img_name,
                    "pred": str(pred),
                    "confidence_score": f'{confidence_score:0.4f}'
                }
                predList.append(restult)

            #Imagenes location /location/
            file_list_1 = os.listdir("../process/location")
            file_list_2 = os.listdir("../process/images")

            retList["pred"] = predList
            retList["localizacion_url"] = "/location/" + file_list_1[0]
            retList["image_url"] = "/images/" + file_list_2[0]

            #for file in file_list:
            #	restult = {"localizacion_url": "/location/"+file}
            #	retList[]

            #Imagenes image /images/
            #file_list = os.listdir("../process/images")

            #for file in file_list:
            #	restult = {"image_url": "/images/"+file}
            #	retList.append(restult)

            #json_mylist = json.dumps(retList)

            print(retList)
def demo(opt):
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)
    model = torch.nn.DataParallel(model).to(device)

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            if 'CTC' in opt.Prediction:
                preds = model(image, text_for_pred)

                # Select max probabilty (greedy decoding) then decode index to character
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.max(2)
                # preds_index = preds_index.view(-1)
                preds_str = converter.decode(preds_index, preds_size)

            else:
                preds = model(image, text_for_pred, is_train=False)

                # select max probabilty (greedy decoding) then decode index to character
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

            log = open(f'./log_demo_result.txt', 'a')
            dashed_line = '-' * 80
            head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'

            print(f'{dashed_line}\n{head}\n{dashed_line}')
            log.write(f'{dashed_line}\n{head}\n{dashed_line}\n')

            preds_prob = F.softmax(preds, dim=2)
            preds_max_prob, _ = preds_prob.max(dim=2)
            for img_name, pred, pred_max_prob in zip(image_path_list,
                                                     preds_str,
                                                     preds_max_prob):
                if 'Attn' in opt.Prediction:
                    pred_EOS = pred.find('[s]')
                    pred = pred[:
                                pred_EOS]  # prune after "end of sentence" token ([s])
                    pred_max_prob = pred_max_prob[:pred_EOS]

                # calculate confidence score (= multiply of pred_max_prob)
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]

                print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}')
                log.write(
                    f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n')
                custom_output.write(
                    f'{img_name}\t{pred}\t{confidence_score:0.4f}\n')
            log.close()
        #print("Cropped image")
        mask_file = result_folder + filename + "_" + str(
            order_sorted[i]) + "_" + str(i) + '.jpg'
        #print(mask_file)
        crop_image = rgb_img[int(min_point[1]):int(max_point[1]),
                             int(min_point[0]):int(max_point[0])]
        #plt.imshow(crop_image)
        #plt.show()
        cv2.imwrite(mask_file, crop_image)

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    #result_folder = './intermediate_result/'
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=result_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)
    print("Starting text classification")
    model.eval()
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            #image = (torch.from_numpy(crop_image).unsqueeze(0)).to(device)
            #print(image_path_list)
            #print(image.size())