Python RawDataset примеры использования

Язык программирования: Python

Пространство имен/Пакет: dataset

Класс/Тип: RawDataset

Примеров на hotexamples.com: 25

Python RawDataset - 25 примеров найдено. Это лучшие примеры Python кода для dataset.RawDataset, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

RawDataset(25)

Основные методы

RawDataset (25)

Пример #1

Показать файл

Файл: main.py Проект: rongliangzi/Dense-Scale-Network-for-Crowd-Counting

def get_loader(train_path, test_path, ratio):
    train_img_paths = []
    for img_path in glob.glob(os.path.join(train_path, '*.jpg')):
        train_img_paths.append(img_path)
    test_img_paths = []
    for img_path in glob.glob(os.path.join(test_path, '*.jpg')):
        test_img_paths.append(img_path)
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
    train_loader = torch.utils.data.DataLoader(RawDataset(train_img_paths, transform, aug=True, ratio=ratio), shuffle=True, batch_size=1)
    test_loader = torch.utils.data.DataLoader(RawDataset(test_img_paths, transform, ratio=1, aug=False), shuffle=False, batch_size=1)

Пример #2

Показать файл

Файл: text_reader.py Проект: maheshmadhusudanan/deep-text-recognition-benchmark

    def predictAllImagesInFolder(self, src_path):

        opt = self.opts
        AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                         imgW=opt.imgW,
                                         keep_ratio_with_pad=opt.PAD)
        demo_data = RawDataset(root=src_path, opt=opt)  # use RawDataset
        demo_loader = torch.utils.data.DataLoader(
            demo_data,
            batch_size=opt.batch_size,
            shuffle=False,
            num_workers=int(opt.workers),
            collate_fn=AlignCollate_demo,
            pin_memory=torch.cuda.is_available())

        results = []
        for image_tensors, image_path_list in demo_loader:

            preds_str = self.predict(image_tensors)

            for img_name, pred in zip(image_path_list, preds_str):
                if 'Attn' in opt.Prediction:
                    pred = pred[:pred.find(
                        '[s]')]  # prune after "end of sentence" token ([s])
                results.append(f'{os.path.basename(img_name)},{pred}')

        return results

Пример #3

Показать файл

def get_loader(args):
    test_img_paths = []
    for img_path in glob.glob(os.path.join(args.test_img_dir, '*.jpg')):
        test_img_paths.append(img_path)
    test_loader = torch.utils.data.DataLoader(RawDataset(test_img_paths,
                                                         transform,
                                                         ratio=1,
                                                         aug=False),
                                              shuffle=False,
                                              batch_size=1)
    return test_loader, test_img_paths

Пример #4

Показать файл

Файл: demo.py Проект: inouetaka/OCR_web

def original_demo(model, converter, length_for_pred, text_for_pred):
    opt = option()
    AlignCollate_demo = AlignCollate(imgH=opt['imgH'],
                                     imgW=opt['imgW'],
                                     keep_ratio_with_pad=opt['PAD'])
    demo_data = RawDataset(root=opt['image_folder'], opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt['batch_size'],
                                              shuffle=False,
                                              num_workers=int(opt['workers']),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)
    print(demo_loader)
    # predict

    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # 最大長予測用
            #torch.cuda.synchronize(device)
            if 'CTC' == opt['Prediction']:
                print('kotti')
                preds = model(image, text_for_pred).log_softmax(2)
                # 最大確率を選択し、インデックスを文字にデコードします
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.permute(1, 0, 2).max(2)
                preds_index = preds_index.transpose(1, 0).contiguous().view(-1)
                preds_str = converter.decode(preds_index.data, preds_size.data)

            else:
                preds = model(image, text_for_pred, is_train=False)

                # 最大確率を選択し、インデックスを文字にデコードします
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

            print('-' * 80)
            print('image_path\tpredicted_labels')
            print('-' * 80)
            for img_name, pred in zip(image_path_list, preds_str):
                if 'Attn' == opt['Prediction']:
                    pred = pred[:pred.find('[s]')]  # 文の終わりトークン（[s]）の後の剪定

                print(f'{img_name}\t{pred}')

Пример #5

Показать файл

Файл: data_pool.py Проект: haohy/End_to_End_Incremental_Learning

    def add_data(self, model, new_data, num_everyclass, device):
        """add the new data to datapool and reduce the quatity of data stored.
        
        Args:
            model: representer.
            new_data: list, [[data,[label]], ...]
            label_list: list, ['0'...]
            num_everyclass: int.
        """
        # if the data pool isn't None, adjust the number of data stored
        if len(self.data_pool_dict) >= 0:
            remained_dict, msg = adjust_data_pool(self.data_pool_dict,
                                                  num_everyclass)
            self.data_pool_dict = remained_dict
            logging.info(msg)
            logging.info("num_everyclass = {}".format(num_everyclass))

        data_dict_tmp = {}
        feature_mean_dict = {}
        for class_label in new_data.classes:
            dataset_tmp = RawDataset(new_data.dir_data, new_data.dataname,
                                     new_data.task, [class_label])
            dataloader_tmp = DataLoader(dataset_tmp,
                                        batch_size=16,
                                        num_workers=1)
            data_feature = get_output(model, dataloader_tmp, device)
            feature_mean = np.mean(data_feature, axis=0)
            dist_data = np.sum(data_feature - feature_mean, axis=1)
            idx_selected = np.argsort(dist_data)[:num_everyclass]
            data_selected = get_selected_idx(new_data.ts_list, idx_selected)
            data_dict_tmp[class_label] = data_selected
            feature_mean_dict[class_label] = feature_mean

        # update the data pool
        self.data_pool_dict.update(data_dict_tmp)
        self.feature_mean.update(feature_mean_dict)
        self.classes += new_data.classes
        self.num_everyclass = num_everyclass

        self.save_datapool_to_pkl()

Пример #6

Показать файл

Файл: demo.py Проект: linbeyoung/deep-text-recognition-benchmark

def demo(opt):
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)
    try:
        model = torch.nn.DataParallel(model).to(device)
    except RuntimeError:
        raise RuntimeError(device)

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            if 'CTC' in opt.Prediction:
                preds = model(image, text_for_pred)

                # Select max probabilty (greedy decoding) then decode index to character
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.max(2)
                # preds_index = preds_index.view(-1)
                preds_str = converter.decode(preds_index, preds_size)
            else:
                preds, alphas = model(image, text_for_pred, is_train=False)
                alphas = alphas.detach().cpu().numpy()
                if opt.batch_max_length == 1:
                    # select top_k probabilty (greedy decoding) then decode index to character
                    k = opt.topk
                    preds = F.softmax(preds, dim=2)
                    topk_prob, topk_id = preds.topk(k)
                    topk_id = topk_id.detach().cpu()[:, 0, :].unsqueeze(
                        dim=1).numpy()  # (batch_size, topk)
                    # concat 3(['s']) to the end of ids
                    topk_s = np.ones_like(topk_id) * 3
                    topk_id = np.concatenate((topk_id, topk_s), axis=1)
                    topk_chars = converter.decode(topk_id, length_for_pred)
                    topk_probs = topk_prob.detach().cpu(
                    )[:, 0, :]  # (batch_size, topk)
                else:
                    # select max probabilty (greedy decoding) then decode index to character
                    k = opt.topk
                    # _, preds_index = preds.max(dim=2)
                    # preds_str = converter.decode(preds_index, length_for_pred)
                    preds = F.softmax(preds, dim=2)
                    topk_prob, topk_id = preds.topk(k, dim=2)
                    topk_id = topk_id.detach().cpu().numpy(
                    )  # (batch_size, topk)
                    topk_probs = topk_prob.detach().cpu()
                    topk_strs = converter.decode(topk_id, length_for_pred)

            if opt.batch_max_length == 1:
                log = open(f'./log_demo_result.csv', 'a', encoding='utf-8')
                # topk_probs = F.softmax(topk_probs, dim=-1)
                for img_name, pred, pred_max_prob in zip(
                        image_path_list, topk_chars, topk_probs):
                    if 'Attn' in opt.Prediction:
                        pred = [p[:p.find('[s]')] for p in pred
                                ]  # prune after "end of sentence" token ([s])
                    print(img_name, end='')
                    log.write(img_name)
                    for pred_char, pred_prob in zip(pred, pred_max_prob):
                        print(',' + pred_char, end='')
                        print(',%.4f' % pred_prob, end='')
                        log.write(',' + pred_char)
                        log.write(',%.4f' % pred_prob)
                    print()
                    log.write('\n')
                log.close()
            else:
                log = open(f'./log_demo_result.txt', 'a', encoding='utf-8')
                dashed_line = '-' * 80
                head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'

                print(f'{dashed_line}\n{head}\n{dashed_line}')
                log.write(f'{dashed_line}\n{head}\n{dashed_line}\n')

                # preds_prob = F.softmax(preds, dim=2)
                # preds_max_prob, _ = preds_prob.max(dim=2)
                if 'Attn' in opt.Prediction:
                    for idx, (img_name, pred, pred_max_prob) in enumerate(
                            zip(image_path_list, topk_strs, topk_probs)):
                        pred_EOS = pred[0].find('[s]')
                        pred = [s[:pred_EOS] for s in pred
                                ]  # prune after "end of sentence" token ([s])
                        pred_max_prob = pred_max_prob[:pred_EOS, :]
                        if opt.output_split:
                            alpha = alphas[idx, :, :].transpose()
                            img = Image.open(img_name).convert('RGB')
                            width, height = img.size
                            alpha = alpha[:pred_EOS]
                            if len(alpha) > 0:
                                last_alpha_line = alpha[-1]
                                # 消除padding的影响
                                seq_length = last_alpha_line.shape[0]
                                column_range = np.arange(0, seq_length)
                                ratio = height / width
                                # too long, compress into opt shape, don't need pad
                                if ratio > opt.imgH / opt.imgW:
                                    want_height = opt.imgW * ratio
                                    compress_ratio = want_height / opt.imgH
                                    expect_last_column = seq_length
                                # need pad
                                else:
                                    compress_ratio = 1
                                    expect_height = height / width * opt.imgW
                                    expect_last_column = expect_height / opt.imgH * seq_length
                                column_range = column_range - seq_length / 2
                                column_range = column_range / 320 * (
                                    320 + (compress_ratio - 1) * 32)
                                column_range = column_range + seq_length / 2
                                # column_range = column_range - column_range[0]
                                # last_column = np.argmax(last_alpha_line)
                                last_column = np.dot(last_alpha_line,
                                                     column_range)
                                expect_linein = expect_last_column - last_column
                                split_output = os.path.join(
                                    'output',
                                    os.path.splitext(
                                        os.path.basename(img_name))[0] +
                                    '.txt')
                                with open(split_output, 'w',
                                          encoding='utf-8') as fp:
                                    draw = ImageDraw.Draw(img)
                                    for alpha_line in alpha:
                                        column = np.dot(
                                            alpha_line, column_range)
                                        line_height = int(
                                            (column - expect_linein / 2) /
                                            (last_column - expect_linein / 2) *
                                            height)
                                        # line_height = int(column / last_column * height)
                                        line = [
                                            0, line_height, width - 1,
                                            line_height
                                        ]
                                        line = list(map(str, line))
                                        fp.write(','.join(line) + '\n')
                                        draw.line(((0, line_height),
                                                   (width - 1, line_height)),
                                                  fill=(255, 0, 0),
                                                  width=2)
                                    img.save(
                                        os.path.join(
                                            'output',
                                            os.path.basename(img_name)))

                        best_pred = pred[0]
                        best_prob = pred_max_prob[:, 0]

                        # calculate confidence score (= multiply of pred_max_prob)
                        try:
                            confidence_score = best_prob.cumprod(dim=0)[-1]
                        except IndexError:
                            confidence_score = 0.0
                            # print(f'{img_name:25s}\t{pred:25s}\t can\'t predict')
                            # raise ValueError()
                        print(
                            f'{img_name:25s}\t{best_pred:25s}\t{confidence_score:0.4f}'
                        )
                        log.write(
                            f'{img_name:25s}\t{best_pred:25s}\t{confidence_score:0.4f}\n'
                        )
                        for i in range(k):
                            print(f'Candidatae {i:1d}: ', end='')
                            for j in range(pred_EOS):
                                print(
                                    f'{pred[i][j]}, prob: {pred_max_prob[j][i]:0.4f}\t',
                                    end='')
                            print()

                else:
                    preds_prob = F.softmax(preds, dim=2)
                    preds_max_prob, _ = preds_prob.max(dim=2)
                    for img_name, pred, pred_max_prob, pred_idx in zip(
                            image_path_list, preds_str, preds_max_prob,
                            preds_index):
                        pred_EOS = len(pred)
                        pred_max_prob = pred_max_prob[:pred_EOS]
                        # calculate confidence score (= multiply of pred_max_prob)
                        try:
                            confidence_score = pred_max_prob.cumprod(dim=0)[-1]
                        except IndexError:
                            confidence_score = 0.0
                            # print(f'{img_name:25s}\t{pred:25s}\t can\'t predict')
                            # raise ValueError()
                        if opt.output_split:
                            img = Image.open(img_name).convert('RGB')
                            width, height = img.size
                            pred_idx = pred_idx.detach().cpu().numpy().tolist()
                            preds_len = len(pred_idx)
                            ratio = height / width
                            # too long, compress into opt shape, don't need pad
                            if ratio > opt.imgH / opt.imgW:
                                want_height = opt.imgW * ratio
                                compress_ratio = want_height / opt.imgH
                                expect_last_column = preds_len
                            # need pad
                            else:
                                compress_ratio = 1
                                expect_height = height / width * opt.imgW
                                expect_last_column = expect_height / opt.imgH * preds_len
                            split_output = os.path.join(
                                'output',
                                os.path.splitext(os.path.basename(img_name))[0]
                                + '.txt')

                            # hyper-parameter, suggestion 6-0.46-0.21 for 320CTC
                            # TODO find hyper-parameter for 480CTC
                            CTC_start = 6
                            center_ratio = 0.46
                            zoom_ratio = 0.21
                            # for CTC_start in np.arange(6.0, 7.1, 0.1):
                            #     for center_ratio in np.arange(0.37, 0.46, 0.01):
                            #         for zoom_ratio in np.arange(0.18, 0.23, 0.01):
                            img = Image.open(img_name).convert('RGB')
                            with open(split_output, 'w',
                                      encoding='utf-8') as fp:
                                cur_pos = 0
                                draw = ImageDraw.Draw(img)
                                index_group = itertools.groupby(pred_idx)
                                for key, group in index_group:
                                    group = list(group)
                                    if key != 0:
                                        nxt_pos = cur_pos - 1 + len(group)
                                        column = (cur_pos + nxt_pos) // 2
                                        column = column - CTC_start
                                        column = (column - preds_len * center_ratio) * (1 + zoom_ratio * compress_ratio) \
                                                 + (preds_len * center_ratio)
                                        line_height = int(column /
                                                          expect_last_column *
                                                          height)

                                        line = [
                                            0, line_height, width - 1,
                                            line_height
                                        ]
                                        line = list(map(str, line))
                                        fp.write(','.join(line) + '\n')
                                        draw.line(((0, line_height),
                                                   (width - 1, line_height)),
                                                  fill=(255, 0, 0),
                                                  width=2)
                                    cur_pos += len(group)
                                img.save(
                                    os.path.join('output',
                                                 os.path.basename(img_name)))
                                # img.save(os.path.join('output', '{}_{:02d}_{:03d}_{:03d}.jpg'.format(os.path.splitext(os.path.basename(img_name))[0], int(CTC_start*10), int(center_ratio*100), int(zoom_ratio*100))))

                        print(
                            f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}'
                        )
                        log.write(
                            f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n'
                        )
                log.close()

Пример #7

Показать файл

Файл: text_extractor.py Проект: rohanrajnair/MultiModal-Movie-Genre-Analysis

    def extract_text(self):
        l = sorted(os.listdir(self.i_folder))
        img_to_index = {}
        count = 0
        for full_file in l:
            split_file = full_file.split(".")
            filename = split_file[0]
            img_to_index[count] = filename
            #print(count, filename)
            count += 1
            #print(filename)
            file_extension = "." + split_file[1]
            #print(filename, file_extension)
            image = imgproc.loadImage(self.i_folder + full_file)
            bboxes, polys, score_text = self.test_net(
                self.net, image, self.text_threshold, self.link_threshold,
                self.low_text, self.cuda, self.poly, self.refine_net)
            img = cv2.imread(self.i_folder + filename + file_extension)
            rgb_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            points = []
            order = []
            for i in range(0, len(bboxes)):
                sample_bbox = bboxes[i]
                min_point = sample_bbox[0]
                max_point = sample_bbox[2]
                for j, p in enumerate(sample_bbox):
                    if (p[0] <= min_point[0]):
                        min_point = (p[0], min_point[1])
                    if (p[1] <= min_point[1]):
                        min_point = (min_point[0], p[1])
                    if (p[0] >= max_point[0]):
                        max_point = (p[0], max_point[1])
                    if (p[1] >= max_point[1]):
                        max_point = (max_point[0], p[1])
                min_point = (max(min(len(rgb_img[0]), min_point[0]),
                                 0), max(min(len(rgb_img), min_point[1]), 0))
                max_point = (max(min(len(rgb_img[0]), max_point[0]),
                                 0), max(min(len(rgb_img), max_point[1]), 0))
                points.append((min_point, max_point))
                order.append(0)
            num_ordered = 0
            rows_ordered = 0
            points_sorted = []
            ordered_points_index = 0
            order_sorted = []
            while (num_ordered < len(points)):
                #find lowest-y that is unordered
                min_y = len(rgb_img)
                min_y_index = -1
                for i in range(0, len(points)):
                    if (order[i] == 0):
                        if (points[i][0][1] <= min_y):
                            min_y = points[i][0][1]
                            min_y_index = i
                rows_ordered += 1
                order[min_y_index] = rows_ordered
                num_ordered += 1
                points_sorted.append(points[min_y_index])
                order_sorted.append(rows_ordered)
                ordered_points_index = len(points_sorted) - 1

                # Group bboxes that are on the same row
                max_y = points[min_y_index][1][1]
                range_y = max_y - min_y
                for i in range(0, len(points)):
                    if (order[i] == 0):
                        min_y_i = points[i][0][1]
                        max_y_i = points[i][1][1]
                        range_y_i = max_y_i - min_y_i
                        if (max_y_i >= min_y and min_y_i <= max_y):
                            overlap = (min(max_y_i, max_y) -
                                       max(min_y_i, min_y)) / (max(
                                           1, min(range_y, range_y_i)))
                            if (overlap >= 0.30):
                                order[i] = rows_ordered
                                num_ordered += 1
                                min_x_i = points[i][0][0]
                                for j in range(ordered_points_index,
                                               len(points_sorted) + 1):
                                    if (j < len(points_sorted)
                                        ):  #insert before
                                        min_x_j = points_sorted[j][0][0]
                                        if (min_x_i < min_x_j):
                                            points_sorted.insert(j, points[i])
                                            order_sorted.insert(
                                                j, rows_ordered)
                                            break
                                    else:  #insert at the end of array
                                        points_sorted.insert(j, points[i])
                                        order_sorted.insert(j, rows_ordered)
                                        break
            for i in range(0, len(points_sorted)):
                min_point = points_sorted[i][0]
                max_point = points_sorted[i][1]
                mask_file = self.result_folder + filename + "_" + str(
                    order_sorted[i]) + "_" + str(i) + file_extension
                crop_image = rgb_img[int(min_point[1]):int(max_point[1]),
                                     int(min_point[0]):int(max_point[0])]
                #print(filename, min_point, max_point, len(rgb_img), len(rgb_img[0]))
                cv2.imwrite(mask_file, crop_image)
        AlignCollate_demo = AlignCollate(imgH=self.opt.imgH,
                                         imgW=self.opt.imgW,
                                         keep_ratio_with_pad=self.opt.PAD)
        demo_data = RawDataset(root=self.result_folder,
                               opt=self.opt)  # use RawDataset
        demo_loader = torch.utils.data.DataLoader(
            demo_data,
            batch_size=self.opt.batch_size,
            shuffle=False,
            num_workers=int(self.opt.workers),
            collate_fn=AlignCollate_demo,
            pin_memory=True)
        f = open(self.extract_text_file, "w")
        count = -1
        curr_order = 1
        curr_filename = ""
        output_string = ""
        end_line = "[SEP] "
        with torch.no_grad():
            for image_tensors, image_path_list in demo_loader:
                batch_size = image_tensors.size(0)
                image = image_tensors.to(self.device)
                #image = (torch.from_numpy(crop_image).unsqueeze(0)).to(device)
                #print(image_path_list)
                #print(image.size())
                length_for_pred = torch.IntTensor([self.opt.batch_max_length] *
                                                  batch_size).to(self.device)
                text_for_pred = torch.LongTensor(batch_size,
                                                 self.opt.batch_max_length +
                                                 1).fill_(0).to(self.device)
                preds = self.model(image, text_for_pred, is_train=False)
                _, preds_index = preds.max(2)
                preds_str = self.converter.decode(preds_index, length_for_pred)
                for path, p in zip(image_path_list, preds_str):
                    #print(path)
                    if 'Attn' in self.opt.Prediction:
                        pred_EOS = p.find('[s]')
                        p = p[:
                              pred_EOS]  # prune after "end of sentence" token ([s])
                    path_info = path[len(self.result_folder):].split(
                        ".")[0].split(
                            "_"
                        )  #ASSUMES FILE EXTENSION OF SIZE 4 (.PNG, .JPG, ETC)
                    #print(curr_filename)
                    #print(path_info[0])
                    #print("PATHINFO: ",path_info[0])
                    if (not (curr_filename == path_info[0])):
                        if (not (curr_filename == "")):
                            f.write(str(count) + "\n")
                            f.write(curr_filename + "\n")
                            f.write(output_string + "\n\n")
                        count += 1
                        curr_filename = img_to_index[count]  #path_info[0]
                        #print("CURRFILE: ", curr_filename)
                        while (not (curr_filename == path_info[0])):
                            f.write(str(count) + "\n")
                            f.write(curr_filename + "\n")
                            f.write("\n\n")
                            count += 1
                            curr_filename = img_to_index[count]  #path_info[0]
                            #print("CURRFILE: ", curr_filename)
                        output_string = ""
                        curr_order = 1
                    if (int(path_info[1]) > curr_order):
                        curr_order += 1
                        output_string += end_line
                    output_string += p + " "
            f.write(str(count) + "\n")
            f.write(curr_filename + "\n")
            f.write(output_string + "\n\n")
        f.close()

Пример #8

Показать файл

def demo(opt, length, db_url):
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)
    model = torch.nn.DataParallel(model).to(device)

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            if 'CTC' in opt.Prediction:
                preds = model(image, text_for_pred)

                # Select max probabilty (greedy decoding) then decode index to character
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.max(2)
                # preds_index = preds_index.view(-1)
                preds_str = converter.decode(preds_index, preds_size)

            else:
                preds = model(image, text_for_pred, is_train=False)

                # select max probabilty (greedy decoding) then decode index to character
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

            log = open(f'./log_demo_result.txt', 'a')
            dashed_line = '-' * 80
            head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'

            print(f'{dashed_line}\n{head}\n{dashed_line}')
            log.write(f'{dashed_line}\n{head}\n{dashed_line}\n')

            preds_prob = F.softmax(preds, dim=2)
            preds_max_prob, _ = preds_prob.max(dim=2)
            for img_name, pred, pred_max_prob in zip(image_path_list,
                                                     preds_str,
                                                     preds_max_prob):
                #we are only interested in plates themselves
                if img_name.find('plate_', 0, len(img_name)) == -1:
                    continue
                if 'Attn' in opt.Prediction:
                    pred_EOS = pred.find('[s]')
                    pred = pred[:
                                pred_EOS]  # prune after "end of sentence" token ([s])
                    pred_max_prob = pred_max_prob[:pred_EOS]

                # calculate confidence score (= multiply of pred_max_prob)
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]

                #getting name of the current image
                img_name = img_name.replace('.jpg', '')
                img_name = img_name.replace('res_', '')
                img_name = img_name.replace('plate_', '')

                #cutting piece of full path which equals length
                print(length)
                img_name = img_name[length:]
                print(img_name)

                #splitting into image name and db name
                res = re.split(r'/', img_name)
                base = res[0]
                img_name = res[1]
                print(base)

                #splitting into first number of image and second (which are frame num and id respectively)
                result = re.split(r'_', img_name)
                print(result)
                engine = create_engine(db_url)
                conn = engine.connect()
                #writing recognised numbers to db
                sql = text('UPDATE table_' + base + ' SET plate_number =' +
                           pred + ' WHERE frame = ' + result[0] +
                           ' AND id = ' + result[1] + ' ;')
                engine.execute(sql)
                #print(result[0])
                #print(result[1])
                #print(base)
                print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}')
                log.write(
                    f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n')
                ##img_name =re.sub(r'\w+\/', '', img_name)
                ##result = re.split(r'_', img_name)
            log.close()

Пример #9

Показать файл

def demo(opt):
    """ model configuration """
    converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    # print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel,
    #       opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
    #       opt.SequenceModeling, opt.Prediction)
    model = torch.nn.DataParallel(model).to(device)

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()

    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            all_pred_strs = []
            all_confidence_scores = []
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            predss = model(image, text_for_pred, is_train=False)[0]

            for i, preds in enumerate(predss):
                confidence_score_list = []
                pred_str_list = []

                # select max probability (greedy decoding) then decode index to character
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

                preds_prob = F.softmax(preds, dim=2)
                preds_max_prob, _ = preds_prob.max(dim=2)
                for pred, pred_max_prob in zip(preds_str, preds_max_prob):
                    pred_EOS = pred.find('[s]')
                    pred = pred[:
                                pred_EOS]  # prune after "end of sentence" token ([s])
                    pred_str_list.append(pred)
                    pred_max_prob = pred_max_prob[:pred_EOS]

                    # calculate confidence score (= multiply of pred_max_prob)
                    try:
                        confidence_score = pred_max_prob.cumprod(
                            dim=0)[-1].cpu().numpy()
                    except:
                        confidence_score = 0  # for empty pred case, when prune after "end of sentence" token ([s])
                    confidence_score_list.append(confidence_score)

                all_pred_strs.append(pred_str_list)
                all_confidence_scores.append(confidence_score_list)

            all_confidence_scores = np.array(all_confidence_scores)
            all_pred_strs = np.array(all_pred_strs)

            best_pred_index = np.argmax(all_confidence_scores, axis=0)
            best_pred_index = np.expand_dims(best_pred_index, axis=0)

            # Get max predition per image through blocks
            all_pred_strs = np.take_along_axis(all_pred_strs,
                                               best_pred_index,
                                               axis=0)[0]
            all_confidence_scores = np.take_along_axis(all_confidence_scores,
                                                       best_pred_index,
                                                       axis=0)[0]

            log = open(f'./log_demo_result.txt', 'w')
            dashed_line = '-' * 80
            head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'

            print(f'{dashed_line}\n{head}\n{dashed_line}')
            log.write(f'{dashed_line}\n{head}\n{dashed_line}\n')
            for img_name, pred, confidence_score in zip(
                    image_path_list, all_pred_strs, all_confidence_scores):
                print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}')
                log.write(
                    f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n')

            log.close()

Пример #10

Показать файл

def demo(opt):
    """ model configuration """
    if 'Transformer' in opt.SequenceModeling:
        converter = TransformerLabelConverter(opt.character)
    elif 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)

    # load model
    if opt.saved_model != '':
        print('loading pretrained model from %s' % opt.saved_model)
        checkpoint = torch.load(opt.saved_model)
        if type(checkpoint) == dict:
            model.load_state_dict(checkpoint['state_dict'])
        else:
            model.load_state_dict(checkpoint)
        del checkpoint
        torch.cuda.empty_cache()

    model = torch.nn.DataParallel(model)
    if torch.cuda.is_available():
        model = model.cuda()

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()
    dict_gt = {}
    with open('gt.txt', 'r') as gt_file:
        gt = gt_file.readlines()
        for line in gt:
            key = line.split(', "')[0]
            value = line.split(', "')[1].replace('"\n', '').lower()
            dict_gt[key] = value
    for image_tensors, image_path_list in demo_loader:
        batch_size = image_tensors.size(0)
        with torch.no_grad():
            image = image_tensors.cuda()
            # For max length prediction
            length_for_pred = torch.cuda.IntTensor([opt.batch_max_length] *
                                                   batch_size)
            text_for_pred = torch.cuda.LongTensor(
                batch_size, opt.batch_max_length + 1).fill_(0)
        if 'Transformer' in opt.SequenceModeling:
            preds = model(image, text_for_pred, is_train=False)
            # select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index, length_for_pred)

        elif 'CTC' in opt.Prediction:
            preds = model(image, text_for_pred).log_softmax(2)

            # Select max probabilty (greedy decoding) then decode index to character
            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
            _, preds_index = preds.permute(1, 0, 2).max(2)
            preds_index = preds_index.transpose(1, 0).contiguous().view(-1)
            preds_str = converter.decode(preds_index.data, preds_size.data)

        else:
            preds = model(image, text_for_pred, is_train=False)

            # select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index, length_for_pred)

        print('-' * 80)
        print('image_path\tpredicted_labels')
        print('-' * 80)
        for img_name, pred in zip(image_path_list, preds_str):
            if 'Transformer' in opt.SequenceModeling:
                pred = pred[:pred.find('</s>')]
            elif 'Attn' in opt.Prediction:
                # prune after "end of sentence" token ([s])
                pred = pred[:pred.find('[s]')]
            raw_img = cv2.imread(img_name)
            raw_img = cv2.resize(raw_img, (200, 64))
            tmp_img = np.zeros((128, 200, 3), np.uint8)
            tmp_img.fill(255)
            tmp_img[:64, :200] = raw_img
            raw_img = tmp_img
            font = cv2.FONT_HERSHEY_SIMPLEX
            bottomLeftCornerOfText = (5, 90)
            fontScale = 1
            fontColor = (0, 0, 255)
            lineType = 2
            if pred == dict_gt[img_name.split('/')[-1]]:
                cv2.putText(raw_img, pred, (5, 90), font, fontScale,
                            (0, 255, 0), lineType)
                raw_img = raw_img[:96, :200]
                cv2.imwrite('./trash/true/' + img_name.split('/')[-1], raw_img)
            else:
                cv2.putText(raw_img, pred, (5, 90), font, fontScale,
                            (0, 0, 255), lineType)
                cv2.putText(raw_img, dict_gt[img_name.split('/')[-1]],
                            (5, 125), font, fontScale, (0, 255, 0), lineType)
                cv2.imwrite('./trash/false/' + img_name.split('/')[-1],
                            raw_img)
            print(f'{img_name}\t{pred}')

Пример #11

Показать файл

Файл: demo.py Проект: Jumpst3r/wordimage2text

def demo(opt):
    inputimage = opt.input_image
    boxesscv = opt.boxescsv
    bboxes = parse_csv(inputimage, boxesscv)
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)
    model = torch.nn.DataParallel(model).to(device)

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            if 'CTC' in opt.Prediction:
                preds = model(image, text_for_pred)

                # Select max probabilty (greedy decoding) then decode index to character
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.max(2)
                # preds_index = preds_index.view(-1)
                preds_str = converter.decode(preds_index, preds_size)

            else:
                preds = model(image, text_for_pred, is_train=False)

                # select max probabilty (greedy decoding) then decode index to character
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

            log = open(f'{opt.output_folder}result.csv', 'w')

            preds_prob = F.softmax(preds, dim=2)
            preds_max_prob, _ = preds_prob.max(dim=2)
            for img_index, (pred, pred_max_prob) in enumerate(
                    zip(preds_str, preds_max_prob)):
                if 'Attn' in opt.Prediction:
                    pred_EOS = pred.find('[s]')
                    pred = pred[:
                                pred_EOS]  # prune after "end of sentence" token ([s])
                    pred_max_prob = pred_max_prob[:pred_EOS]

                # calculate confidence score (= multiply of pred_max_prob)
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]
                for pts in bboxes[img_index]:
                    x, y = pts
                    log.write(f'{x},{y},')
                log.write(f'{pred}\n')

            log.close()
            # copy log to local output folder
            os.system(f'cp {opt.output_folder}result.csv /input/output')
            shutil.make_archive('per_word_visual', 'zip', '/input/output')

Пример #12

Показать файл

Файл: icdar_demo.py Проект: zhitao654321/Bert_OCR.pytorch

def demo(opt):
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    elif 'Bert' in opt.Prediction:
        converter = TransformerConverter(opt.character, opt.batch_max_length)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)
    opt.alphabet_size = len(opt.character) + 2  # +2 for [UNK]+[EOS]

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel,
          opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
          opt.SequenceModeling, opt.Prediction)

    model = torch.nn.DataParallel(model)
    if torch.cuda.is_available():
        model = model.cuda()

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(
        demo_data, batch_size=opt.batch_size,
        shuffle=False,
        num_workers=int(opt.workers),
        collate_fn=AlignCollate_demo, pin_memory=True)

    # mkdir result
    experiment_name = os.path.join('./result', opt.image_folder.split('/')[-2])
    if not os.path.exists(experiment_name):
        os.makedirs(experiment_name)
    result = {}

    # predict
    model.eval()
    for idx, (image_tensors, image_path_list) in enumerate(demo_loader):
        batch_size = image_tensors.size(0)
        with torch.no_grad():
            image = image_tensors.cuda()
            # For max length prediction
            length_for_pred = torch.cuda.IntTensor([opt.batch_max_length] * batch_size)
            text_for_pred = torch.cuda.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0)

        if 'CTC' in opt.Prediction:
            preds = model(image, text_for_pred).log_softmax(2)

            # Select max probabilty (greedy decoding) then decode index to character
            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
            _, preds_index = preds.permute(1, 0, 2).max(2)
            preds_index = preds_index.transpose(1, 0).contiguous().view(-1)
            preds_str = converter.decode(preds_index.data, preds_size.data)

        elif 'Bert' in opt.Prediction:
            with torch.no_grad():
                pad_mask = None
                preds = model(image, pad_mask)

                # select max probabilty (greedy decoding) then decode index to character
                _, preds_index = preds[1].max(2)
                length_for_pred = torch.cuda.IntTensor([preds_index.size(-1)] * batch_size)
                preds_str = converter.decode(preds_index, length_for_pred)

        else:
            preds = model(image, text_for_pred, is_train=False)

            # select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index, length_for_pred)

        print(f'{idx}/{len(demo_data) / opt.batch_size}')

        for img_name, pred in zip(image_path_list, preds_str):
            if 'Attn' in opt.Prediction:
                pred = pred[:pred.find('[s]')]  # prune after "end of sentence" token ([s])

            # for show

            # write in json
            name = f'{img_name}'.split('/')[-1].replace('gt', 'res').split('.')[0]
            value = [{"transcription": f'{pred}'}]
            result[name] = value

    with open(f'{experiment_name}/result.json', 'w') as f:
        json.dump(result, f)
        print("writed finish...")

Пример #13

Показать файл

Файл: demo.py Проект: nightfuryyy/deep-text-recognition-benchmark

def demo(opt):
    """ model configuration """
    if opt.guide_training :
      from model_guide import Model
    else :
      from model import Model
    if opt.baiduCTC:
        converter = CTCLabelConverterForBaiduWarpctc(opt.character)
    else :
        converter = CTCLabelConverter(opt.character)
    if opt.Prediction == 'Attn' :
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)
    opt.num_class_ctc = opt.num_class
    opt.num_class_attn = opt.num_class_ctc + 1

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel,
          opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
          opt.SequenceModeling, opt.Prediction)
    model = torch.nn.DataParallel(model).to(device)

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model, map_location=device), strict = False)

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(
        demo_data, batch_size=opt.batch_size,
        shuffle=False,
        num_workers=int(opt.workers),
        collate_fn=AlignCollate_demo, pin_memory=True)

    # predict
    model.eval()
    data = pd.DataFrame()
    with torch.no_grad():
        ind = 0
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device)

            if 'CTC' in opt.Prediction:
                if opt.guide_training :
                    preds = model.module.inference(image, text_for_pred)
                else :
                    preds = model(image, text_for_pred)

                # Select max probabilty (greedy decoding) then decode index to character
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)

                # Select max probabilty (greedy decoding) then decode index to character
                if opt.baiduCTC:
                    if (opt.beam_search):
                      preds_index = preds
                    else :
                      _, preds_index = preds.max(2)
                      preds_index = preds_index.view(-1)
                else:
                    _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index.data, preds_size.data,opt.beam_search)
            else:
                preds = model(image, text_for_pred, is_train=False)

                # select max probabilty (greedy decoding) then decode index to character
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)


            log = open(f'./log_demo_result.txt', 'a')
            dashed_line = '-' * 80
            head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'
            
            print(f'{dashed_line}\n{head}\n{dashed_line}')
            log.write(f'{dashed_line}\n{head}\n{dashed_line}\n')

            preds_prob = F.softmax(preds, dim=2)
            preds_max_prob, _ = preds_prob.max(dim=2)
            for img_name, pred, pred_max_prob in zip(image_path_list, preds_str, preds_max_prob):
                if 'Attn' in opt.Prediction:
                    pred_EOS = pred.find('[s]')
                    pred = pred[:pred_EOS]  # prune after "end of sentence" token ([s])
                    pred_max_prob = pred_max_prob[:pred_EOS]

                # calculate confidence score (= multiply of pred_max_prob)
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]
                filename = img_name
                label = pred
                conf = round(confidence_score.item(),3)
                img = cv2.imread(filename)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img_pil = Image.fromarray(img)
                img_buffer = io.BytesIO()
                img_pil.save(img_buffer, format="PNG")
                imgStr = base64.b64encode(img_buffer.getvalue()).decode("utf-8") 

                data.loc[ind, 'img'] = '<img src="data:image/png;base64,{0:s}">'.format(imgStr)
                data.loc[ind, 'id'] = filename
                data.loc[ind, 'label'] = label
                data.loc[ind, 'conf'] = conf
                ind+=1
                print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}')
                log.write(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n')

            log.close()
        html_all = data.to_html(escape=False)
        if opt.is_save :
            text_file = open("result.html", "w") 
            text_file.write(html_all) 
            text_file.close()

Пример #14

Показать файл

def index():
    model, converter, length_for_pred, text_for_pred, opt = loader()
    start_time = time.time()

    AlignCollate_demo = AlignCollate(imgH=opt['imgH'],
                                     imgW=opt['imgW'],
                                     keep_ratio_with_pad=opt['PAD'])
    demo_data = RawDataset(root=opt['image_folder'], opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt['batch_size'],
                                              shuffle=False,
                                              num_workers=int(opt['workers']),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    get_data = time.time() - start_time

    # predict
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # 最大長予測用
            # torch.cuda.synchronize(device)
            if 'CTC' in opt['Prediction']:
                preds = model(image, text_for_pred)  #.log_softmax(2)
                preds = preds.log_softmax(2)
                # 最大確率を選択し、インデックスを文字にデコードします
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.max(2)
                preds_index = preds_index.view(-1)
                preds_str = converter.decode(preds_index.data, preds_size.data)

            else:
                preds = model(image, text_for_pred, is_train=False)

                # 最大確率を選択し、インデックスを文字にデコードします
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

            print('-' * 80)
            print('image_path\tpredicted_labels')
            print('-' * 80)
            for img_name, pred in zip(image_path_list, preds_str):
                if 'Attn' in opt['Prediction']:
                    pred = pred[:pred.find('[s]')]  # 文の終わりトークン（[s]）の後の剪定

                print(f'{img_name}\t{pred}')

        forward_time = time.time() - start_time
        only_infer_time = forward_time - get_data

        print('*' * 80)
        print('get_dta_time:{:.5f}[sec]'.format(get_data))
        print('only_infer_time:{:.5f}[sec]'.format(only_infer_time))
        print('total_time:{:.5f}[sec]'.format(forward_time))
        print('*' * 80)

        img_name = [i[9:] for i in image_path_list]
        items = {}
        for path, pred in zip(img_name, preds_str):
            items[path] = pred

    return render_template('index.html', images=items)

Пример #15

Показать файл

def runDeepTextNet(segmentedImagesList):
    opt = argparse.Namespace(FeatureExtraction='ResNet',
                             PAD=False,
                             Prediction='Attn',
                             SequenceModeling='BiLSTM',
                             Transformation='TPS',
                             batch_max_length=25,
                             batch_size=192,
                             character='0123456789abcdefghijklmnopqrstuvwxyz',
                             hidden_size=256,
                             image_folder='demo_image/',
                             imgH=32,
                             imgW=100,
                             input_channel=1,
                             num_class=38,
                             num_fiducial=20,
                             num_gpu=0,
                             output_channel=512,
                             rgb=False,
                             saved_model='TPS-ResNet-BiLSTM-Attn.pth',
                             sensitive=False,
                             workers=4)

    model = Model(opt)
    model = torch.nn.DataParallel(model).to('cpu')
    directory = "TPS-ResNet-BiLSTM-Attn.pth"
    model.load_state_dict(torch.load(directory, map_location='cpu'))

    converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)
    if opt.rgb:
        opt.input_channel = 3

    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=segmentedImagesList, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()

    out_preds_texts = []
    for image_tensors, image_path_list in demo_loader:
        batch_size = image_tensors.size(0)
        image = image_tensors.to(device)
        # For max length prediction
        length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                          batch_size).to(device)
        text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                         1).fill_(0).to(device)
        preds = model(image, text_for_pred, is_train=False)
        # select max probabilty (greedy decoding) then decode index to character
        _, preds_index = preds.max(2)
        preds_str = converter.decode(preds_index, length_for_pred)
        preds_prob = F.softmax(preds, dim=2)
        preds_max_prob, _ = preds_prob.max(dim=2)
        for img_name, pred, pred_max_prob in zip(image_path_list, preds_str,
                                                 preds_max_prob):
            if 'Attn' in opt.Prediction:
                pred_EOS = pred.find('[s]')
                pred = pred[:
                            pred_EOS]  # prune after "end of sentence" token ([s])
                pred_max_prob = pred_max_prob[:pred_EOS]

            # calculate confidence score (= multiply of pred_max_prob)
            confidence_score = pred_max_prob.cumprod(dim=0)[-1]
            # print(pred)
            out_preds_texts.append(pred)
    # print(out_preds_texts)

    sentence_out = [' '.join(out_preds_texts)]
    return (sentence_out)

Пример #16

Показать файл

model.load_state_dict(pre)


#text model-------------------------
class args(object):
    #必要的一些参数设置
    def __init__(self):
        self.rgb = True
        self.imgW = 128
        self.imgH = 128
        self.path = os.path.join(os.getcwd(), 'test_imgs')
        self.batch_size = 4


opt = args()
test_loader = RawDataset(opt.path, opt)
#length_of_data = len(test_loader)#图片数量
test_set = torch.utils.data.DataLoader(dataset=test_loader,
                                       batch_size=opt.batch_size,
                                       shuffle=False,
                                       pin_memory=True)

model.eval()
fig_i = 0
for batch_x, path_x in test_set:
    if len(batch_x) == 0:
        break
    fig_i += 1
    x_tensors = batch_x.to(device)
    out = model(x_tensors)
    pred = torch.max(out, 1)[1]

Пример #17

Показать файл

Файл: demo.py Проект: ChangWoo95/Inter-Korean-summit

def demo(opt):
    """ model configuration """
    lists = []  #목적지라고 생각하는 사진에서 인식한 text를 담을 배열

    converter = AttnLabelConverter(opt.character)  #ATTN

    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3

    model = Model(opt)  #model.py의 Model import

    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling,
          opt.Prediction)  #파라미터 값 정보 출력

    model = torch.nn.DataParallel(model).to(device)  #GPU로 데이터 병렬 처리 진행

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model,
                                     map_location=device))  #모델의 매개변수를 불러옴

    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data1 = RawDataset(root=opt.image_folder1,
                            opt=opt)  # use RawDataset 간판탐지결과
    demo_data2 = RawDataset(root=opt.image_folder2,
                            opt=opt)  # use RawDataset 구글맵문자열탐지결과

    demo_loader1 = torch.utils.data.DataLoader(demo_data1,
                                               batch_size=opt.batch_size,
                                               shuffle=False,
                                               num_workers=int(opt.workers),
                                               collate_fn=AlignCollate_demo,
                                               pin_memory=True)
    demo_loader2 = torch.utils.data.DataLoader(demo_data2,
                                               batch_size=opt.batch_size,
                                               shuffle=False,
                                               num_workers=int(opt.workers),
                                               collate_fn=AlignCollate_demo,
                                               pin_memory=True)

    # predict
    model.eval()
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader1:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)

            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            #ATTn
            preds = model(image, text_for_pred, is_train=False)

            # select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index, length_for_pred)

            log = open(f'./log_demo_result.txt', 'a')  #이어서 쓸수 있게 열고
            dashed_line = '-' * 80
            head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'

            print(f'{dashed_line}\n{head}\n{dashed_line}')  #테이블 양식 출력
            log.write(
                f'{dashed_line}\n{head}\n{dashed_line}\n')  #txt에 테이블 양식 저장

            preds_prob = F.softmax(preds, dim=2)
            preds_max_prob, _ = preds_prob.max(dim=2)
            for img_name, pred, pred_max_prob in zip(image_path_list,
                                                     preds_str,
                                                     preds_max_prob):
                pred_EOS = pred.find('[s]')
                pred = pred[:
                            pred_EOS]  # prune after "end of sentence" token ([s])
                pred_max_prob = pred_max_prob[:pred_EOS]

                # calculate confidence score (= multiply of pred_max_prob) confidence score 값을 계산
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]

                lists.append(pred)
                print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}'
                      )  #구한 값을 출력
                log.write(
                    f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n'
                )  #구한 값을 txt에 저장

            log.close()  #파일 닫기

    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader2:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            #ATTn
            preds = model(image, text_for_pred, is_train=False)

            # select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index, length_for_pred)

            log = open(f'./log_demo_result.txt', 'a')  #이어서 쓸수 있게 열고
            dashed_line = '-' * 80
            head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'

            print(f'{dashed_line}\n{head}\n{dashed_line}')  #테이블 양식 출력
            log.write(
                f'{dashed_line}\n{head}\n{dashed_line}\n')  #txt에 테이블 양식 저장

            preds_prob = F.softmax(preds, dim=2)
            preds_max_prob, _ = preds_prob.max(dim=2)
            for img_name, pred, pred_max_prob in zip(image_path_list,
                                                     preds_str,
                                                     preds_max_prob):
                pred_EOS = pred.find('[s]')
                pred = pred[:
                            pred_EOS]  # prune after "end of sentence" token ([s])
                pred_max_prob = pred_max_prob[:pred_EOS]

                # confidence score 값을 계산
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]

                print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}'
                      )  #구한 값을 출력
                log.write(
                    f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n'
                )  #구한 값을 txt에 저장
                if pred in lists:
                    print(pred + "은(는) 알맞은 목적지입니다.")
                else:
                    print(pred + "은(는) 알맞은 목적지가 아닙니다.")

            log.close()  #파일 닫기

Пример #18

Показать файл

def demo(args):

    """Open csv file wherein you are going to write the Predicted Words"""
    data = pd.read_csv('../data/craft_output/data.csv')

    """ model configuration """
    if 'CTC' in args.Prediction:
        converter = CTCLabelConverter(args.character)
    else:
        converter = AttnLabelConverter(args.character)
    args.num_class = len(converter.character)

    if args.rgb:
        args.input_channel = 3
    model = Model(args)
    print('model input parameters', args.imgH, args.imgW, args.num_fiducial, args.input_channel, args.output_channel,
          args.hidden_size, args.num_class, args.batch_max_length, args.Transformation, args.FeatureExtraction,
          args.SequenceModeling, args.Prediction)
    model = torch.nn.DataParallel(model).to(device)

    # load model
    print('loading pretrained model from %s' % args.saved_model)
    model.load_state_dict(torch.load(args.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=args.imgH, imgW=args.imgW, keep_ratio_with_pad=args.PAD)
    demo_data = RawDataset(root=args.image_folder, args=args)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(
        demo_data, batch_size=args.batch_size,
        shuffle=False,
        num_workers=int(args.workers),
        collate_fn=AlignCollate_demo, pin_memory=True)

    # predict
    model.eval()
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([args.batch_max_length] * batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, args.batch_max_length + 1).fill_(0).to(device)

            if 'CTC' in args.Prediction:
                preds = model(image, text_for_pred)

                # Select max probabilty (greedy decoding) then decode index to character
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.max(2)
                # preds_index = preds_index.view(-1)
                preds_str = converter.decode(preds_index.data, preds_size.data)

            else:
                preds = model(image, text_for_pred, is_train=False)

                # select max probabilty (greedy decoding) then decode index to character
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

            dashed_line = '-' * 80
            head = f'{"image_path":25s}\t {"predicted_labels":25s}\t confidence score'
            
            print(f'{dashed_line}\n{head}\n{dashed_line}')
            # log.write(f'{dashed_line}\n{head}\n{dashed_line}\n')

            preds_prob = F.softmax(preds, dim=2)
            preds_max_prob, _ = preds_prob.max(dim=2)
            for img_name, pred, pred_max_prob in zip(image_path_list, preds_str, preds_max_prob):
                
                
                start = '../data/crop_img/'
                path = os.path.relpath(img_name, start)

                folder = os.path.dirname(path)

                image_name=os.path.basename(path)

                file_name='_'.join(image_name.split('_')[:-8])

                txt_file=os.path.join(start, folder, file_name)                
                
                log = open(f'{txt_file}_log_demo_result.txt', 'a')
                if 'Attn' in args.Prediction:
                    pred_EOS = pred.find('[s]')
                    pred = pred[:pred_EOS]  # prune after "end of sentence" token ([s])
                    pred_max_prob = pred_max_prob[:pred_EOS]

                # calculate confidence score (= multiply of pred_max_prob)
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]
                print(f'{image_name:25s}\t {pred:25s}\t {confidence_score:0.4f}')
                log.write(f'{image_name:25s}\t {pred:25s}\t {confidence_score:0.4f}\n')

            log.close()

Пример #19

Показать файл

def demoToTxt1(image_folder, saved_model, txtFile):  # sensitive
    parser = argparse.ArgumentParser()
    parser.add_argument('--image_folder',
                        default=image_folder,
                        help='path to image_folder which contains text images')
    parser.add_argument('--workers',
                        type=int,
                        help='number of data loading workers',
                        default=4)
    parser.add_argument('--batch_size',
                        type=int,
                        default=100,
                        help='input batch size')
    parser.add_argument('--saved_model',
                        default=saved_model,
                        help="path to saved_model to evaluation")
    """ Data processing """
    parser.add_argument('--batch_max_length',
                        type=int,
                        default=20,
                        help='maximum-label-length')
    parser.add_argument('--imgH',
                        type=int,
                        default=32,
                        help='the height of the input image')
    parser.add_argument('--imgW',
                        type=int,
                        default=100,
                        help='the width of the input image')
    parser.add_argument('--rgb', action='store_true', help='use rgb input')
    parser.add_argument('--character',
                        type=str,
                        default='0123456789',
                        help='character label')
    parser.add_argument('--sensitive',
                        default=True,
                        help='for sensitive character mode')
    parser.add_argument('--PAD',
                        default=False,
                        action='store_true',
                        help='whether to keep ratio then pad for image resize')
    """ Model Architecture """
    parser.add_argument('--Transformation',
                        default='TPS',
                        type=str,
                        help='Transformation stage. None|TPS')
    parser.add_argument('--FeatureExtraction',
                        default='ResNet',
                        type=str,
                        help='FeatureExtraction stage. VGG|RCNN|ResNet')
    parser.add_argument('--SequenceModeling',
                        default='BiLSTM',
                        type=str,
                        help='SequenceModeling stage. None|BiLSTM')
    parser.add_argument('--Prediction',
                        default='CTC',
                        type=str,
                        help='Prediction stage. CTC|Attn')
    parser.add_argument('--num_fiducial',
                        type=int,
                        default=20,
                        help='number of fiducial points of TPS-STN')
    parser.add_argument(
        '--input_channel',
        type=int,
        default=1,
        help='the number of input channel of Feature extractor')
    parser.add_argument(
        '--output_channel',
        type=int,
        default=512,
        help='the number of output channel of Feature extractor')
    parser.add_argument('--hidden_size',
                        type=int,
                        default=256,
                        help='the size of the LSTM hidden state')

    opt = parser.parse_args()
    """ vocab / character number configuration """
    if opt.sensitive:
        opt.character += 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
        # opt.character = string.printable[:-6]  # same with ASTER setting (use 94 char).

    cudnn.benchmark = True
    cudnn.deterministic = True
    opt.num_gpu = torch.cuda.device_count()
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)

    model = torch.nn.DataParallel(model)
    if torch.cuda.is_available():
        model = model.cuda()

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()
    saved_file = open(txtFile, 'w')
    for image_tensors, image_path_list in demo_loader:
        batch_size = image_tensors.size(0)
        with torch.no_grad():
            image = image_tensors.cuda()
            # For max length prediction
            length_for_pred = torch.cuda.IntTensor([opt.batch_max_length] *
                                                   batch_size)
            text_for_pred = torch.cuda.LongTensor(
                batch_size, opt.batch_max_length + 1).fill_(0)

        if 'CTC' in opt.Prediction:
            preds = model(image, text_for_pred).log_softmax(2)

            # Select max probabilty (greedy decoding) then decode index to character
            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
            _, preds_index = preds.permute(1, 0, 2).max(2)
            preds_index = preds_index.transpose(1, 0).contiguous().view(-1)
            preds_str = converter.decode(preds_index.data, preds_size.data)

        else:
            preds = model(image, text_for_pred, is_train=False)

            # select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index, length_for_pred)

        print('-' * 80)
        print('image_path\tpredicted_labels')
        print('-' * 80)

        for img_name, pred in zip(image_path_list, preds_str):
            if 'Attn' in opt.Prediction:
                pred = pred[:pred.find(
                    '[s]')]  # prune after "end of sentence" token ([s])
            print(f'{img_name}\t{pred}')
            saved_file.write(f'{img_name}\t{pred}\n')

Пример #20

Показать файл

Файл: data_pool.py Проект: haohy/End_to_End_Incremental_Learning

 def load_data_pool(self):
     """load data from data pool, return a Dataset."""
     logging.info("load data from data pool.")
     return RawDataset(self.dir_data, self.dataname, 'data_pool',
                       self.classes)

Пример #21

Показать файл

def _textRecognition(opt):
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)
    model = torch.nn.DataParallel(model).to(device)

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    char_list = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"

    csv_filename = os.path.join(opt.output_dirpath, "text_information.csv")
    Header = ["bookID", "prediction"]

    with open(csv_filename, 'w') as f:
        writer = csv.DictWriter(f, fieldnames=Header)
        writer.writeheader()

        model.eval()
        with torch.no_grad():
            for image_tensors, image_path_list in demo_loader:
                batch_size = image_tensors.size(0)
                image = image_tensors.to(device)
                # For max length prediction
                length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                                  batch_size).to(device)
                text_for_pred = torch.LongTensor(
                    batch_size, opt.batch_max_length + 1).fill_(0).to(device)

                if 'CTC' in opt.Prediction:
                    preds = model(image, text_for_pred)

                    # Select max probabilty (greedy decoding) then decode index to character
                    preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                    _, preds_index = preds.max(2)
                    preds_index = preds_index.view(-1)
                    preds_str = converter.decode(preds_index.data,
                                                 preds_size.data)

                else:
                    preds = model(image, text_for_pred, is_train=False)

                    # select max probabilty (greedy decoding) then decode index to character
                    _, preds_index = preds.max(2)
                    preds_str = converter.decode(preds_index, length_for_pred)

                #log = open(f'./text_information.csv', 'a')
                dashed_line = '-' * 80
                head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'

                print(f'{dashed_line}\n{head}\n{dashed_line}')

                preds_prob = F.softmax(preds, dim=2)
                preds_max_prob, _ = preds_prob.max(dim=2)
                for img_name, pred, pred_max_prob in zip(
                        image_path_list, preds_str, preds_max_prob):
                    if 'Attn' in opt.Prediction:
                        pred_EOS = pred.find('[s]')
                        pred = pred[:
                                    pred_EOS]  # prune after "end of sentence" token ([s])
                        pred_max_prob = pred_max_prob[:pred_EOS]

                    # calculate confidence score (= multiply of pred_max_prob)
                    #print("{}:{}".format(pred_max_prob,len(pred_max_prob)))
                    try:
                        confidence_score = pred_max_prob.cumprod(dim=0)[-1]
                    except:
                        deleteImageAndText(opt.book_img_dirpath, img_name)
                        continue
                    #confidence_score = pred_max_prob.cumprod(dim=0)[-1]
                    pred = pred[0]

                    if confidence_score < 0.5:
                        pred = "Unreadble"
                        deleteImageAndText(opt.book_img_dirpath, img_name)
                        continue

                    elif not (pred in char_list):
                        pred = "Undefined"

                    # extract the name part of the image
                    filename = os.path.basename(img_name)

                    print(
                        f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}')

                    writer.writerow({"bookID": filename, "prediction": pred})

Пример #22

Показать файл

def demo(opt):
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)
    model = torch.nn.DataParallel(model).to(device)

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            if 'CTC' in opt.Prediction:
                preds = model(image, text_for_pred).log_softmax(2)

                # Select max probabilty (greedy decoding) then decode index to character
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.permute(1, 0, 2).max(2)
                preds_index = preds_index.transpose(1, 0).contiguous().view(-1)
                preds_str = converter.decode(preds_index.data, preds_size.data)

            else:
                preds = model(image, text_for_pred, is_train=False)

                # select max probabilty (greedy decoding) then decode index to character
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

            print('-' * 80)
            print('image_path\tpredicted_labels')
            print('-' * 80)
            for img_name, pred in zip(image_path_list, preds_str):
                if 'Attn' in opt.Prediction:
                    pred = pred[:pred.find(
                        '[s]')]  # prune after "end of sentence" token ([s])

                print(f'{img_name}\t{pred}')

Пример #23

Показать файл

def demo(opt):
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)

    #print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel,
    #	  opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
    #	  opt.SequenceModeling, opt.Prediction)

    model = torch.nn.DataParallel(model).to(device)

    # load model
    #print('loading pretrained model from %s' % opt.saved_model)

    model.load_state_dict(torch.load(opt.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval

    # Lista con los valores transcriptos
    predList = list()
    retList = dict()

    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            if 'CTC' in opt.Prediction:
                preds = model(image, text_for_pred).log_softmax(2)

                # Select max probabilty (greedy decoding) then decode index to character
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.max(2)
                preds_index = preds_index.view(-1)
                preds_str = converter.decode(preds_index.data, preds_size.data)

            else:
                preds = model(image, text_for_pred, is_train=False)

                # select max probabilty (greedy decoding) then decode index to character
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

            preds_prob = F.softmax(preds, dim=2)
            preds_max_prob, _ = preds_prob.max(dim=2)
            for img_name, pred, pred_max_prob in zip(image_path_list,
                                                     preds_str,
                                                     preds_max_prob):
                if 'Attn' in opt.Prediction:
                    pred_EOS = pred.find('[s]')
                    pred = pred[:
                                pred_EOS]  # prune after "end of sentence" token ([s])
                    pred_max_prob = pred_max_prob[:pred_EOS]

                # calculate confidence score (= multiply of pred_max_prob)
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]

                #Transcripciones
                restult = {
                    "img_name": img_name,
                    "pred": str(pred),
                    "confidence_score": f'{confidence_score:0.4f}'
                }
                predList.append(restult)

            #Imagenes location /location/
            file_list_1 = os.listdir("../process/location")
            file_list_2 = os.listdir("../process/images")

            retList["pred"] = predList
            retList["localizacion_url"] = "/location/" + file_list_1[0]
            retList["image_url"] = "/images/" + file_list_2[0]

            #for file in file_list:
            #	restult = {"localizacion_url": "/location/"+file}
            #	retList[]

            #Imagenes image /images/
            #file_list = os.listdir("../process/images")

            #for file in file_list:
            #	restult = {"image_url": "/images/"+file}
            #	retList.append(restult)

            #json_mylist = json.dumps(retList)

            print(retList)

Пример #24

Показать файл

Файл: pred.py Проект: bavo96/deep-text-recognition-benchmark

def demo(opt):
    """ model configuration """
    if 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)

    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)
    print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial,
          opt.input_channel, opt.output_channel, opt.hidden_size,
          opt.num_class, opt.batch_max_length, opt.Transformation,
          opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction)
    model = torch.nn.DataParallel(model).to(device)

    # load model
    print('loading pretrained model from %s' % opt.saved_model)
    model.load_state_dict(torch.load(opt.saved_model, map_location=device))

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=opt.image_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)

    # predict
    model.eval()
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            # For max length prediction
            length_for_pred = torch.IntTensor([opt.batch_max_length] *
                                              batch_size).to(device)
            text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length +
                                             1).fill_(0).to(device)

            if 'CTC' in opt.Prediction:
                preds = model(image, text_for_pred)

                # Select max probabilty (greedy decoding) then decode index to character
                preds_size = torch.IntTensor([preds.size(1)] * batch_size)
                _, preds_index = preds.max(2)
                # preds_index = preds_index.view(-1)
                preds_str = converter.decode(preds_index, preds_size)

            else:
                preds = model(image, text_for_pred, is_train=False)

                # select max probabilty (greedy decoding) then decode index to character
                _, preds_index = preds.max(2)
                preds_str = converter.decode(preds_index, length_for_pred)

            log = open(f'./log_demo_result.txt', 'a')
            dashed_line = '-' * 80
            head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score'

            print(f'{dashed_line}\n{head}\n{dashed_line}')
            log.write(f'{dashed_line}\n{head}\n{dashed_line}\n')

            preds_prob = F.softmax(preds, dim=2)
            preds_max_prob, _ = preds_prob.max(dim=2)
            for img_name, pred, pred_max_prob in zip(image_path_list,
                                                     preds_str,
                                                     preds_max_prob):
                if 'Attn' in opt.Prediction:
                    pred_EOS = pred.find('[s]')
                    pred = pred[:
                                pred_EOS]  # prune after "end of sentence" token ([s])
                    pred_max_prob = pred_max_prob[:pred_EOS]

                # calculate confidence score (= multiply of pred_max_prob)
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]

                print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}')
                log.write(
                    f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n')
                custom_output.write(
                    f'{img_name}\t{pred}\t{confidence_score:0.4f}\n')
            log.close()

Пример #25

Показать файл

Файл: test_craft.py Проект: rohanrajnair/MultiModal-Movie-Genre-Analysis

        #print("Cropped image")
        mask_file = result_folder + filename + "_" + str(
            order_sorted[i]) + "_" + str(i) + '.jpg'
        #print(mask_file)
        crop_image = rgb_img[int(min_point[1]):int(max_point[1]),
                             int(min_point[0]):int(max_point[0])]
        #plt.imshow(crop_image)
        #plt.show()
        cv2.imwrite(mask_file, crop_image)

    # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo
    #result_folder = './intermediate_result/'
    AlignCollate_demo = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
    demo_data = RawDataset(root=result_folder, opt=opt)  # use RawDataset
    demo_loader = torch.utils.data.DataLoader(demo_data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=int(opt.workers),
                                              collate_fn=AlignCollate_demo,
                                              pin_memory=True)
    print("Starting text classification")
    model.eval()
    with torch.no_grad():
        for image_tensors, image_path_list in demo_loader:
            batch_size = image_tensors.size(0)
            image = image_tensors.to(device)
            #image = (torch.from_numpy(crop_image).unsqueeze(0)).to(device)
            #print(image_path_list)
            #print(image.size())