def get_loader(train_path, test_path, ratio): train_img_paths = [] for img_path in glob.glob(os.path.join(train_path, '*.jpg')): train_img_paths.append(img_path) test_img_paths = [] for img_path in glob.glob(os.path.join(test_path, '*.jpg')): test_img_paths.append(img_path) transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) train_loader = torch.utils.data.DataLoader(RawDataset(train_img_paths, transform, aug=True, ratio=ratio), shuffle=True, batch_size=1) test_loader = torch.utils.data.DataLoader(RawDataset(test_img_paths, transform, ratio=1, aug=False), shuffle=False, batch_size=1)
def predictAllImagesInFolder(self, src_path): opt = self.opts AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data = RawDataset(root=src_path, opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader( demo_data, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=torch.cuda.is_available()) results = [] for image_tensors, image_path_list in demo_loader: preds_str = self.predict(image_tensors) for img_name, pred in zip(image_path_list, preds_str): if 'Attn' in opt.Prediction: pred = pred[:pred.find( '[s]')] # prune after "end of sentence" token ([s]) results.append(f'{os.path.basename(img_name)},{pred}') return results
def get_loader(args): test_img_paths = [] for img_path in glob.glob(os.path.join(args.test_img_dir, '*.jpg')): test_img_paths.append(img_path) test_loader = torch.utils.data.DataLoader(RawDataset(test_img_paths, transform, ratio=1, aug=False), shuffle=False, batch_size=1) return test_loader, test_img_paths
def original_demo(model, converter, length_for_pred, text_for_pred): opt = option() AlignCollate_demo = AlignCollate(imgH=opt['imgH'], imgW=opt['imgW'], keep_ratio_with_pad=opt['PAD']) demo_data = RawDataset(root=opt['image_folder'], opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader(demo_data, batch_size=opt['batch_size'], shuffle=False, num_workers=int(opt['workers']), collate_fn=AlignCollate_demo, pin_memory=True) print(demo_loader) # predict with torch.no_grad(): for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) image = image_tensors.to(device) # 最大長予測用 #torch.cuda.synchronize(device) if 'CTC' == opt['Prediction']: print('kotti') preds = model(image, text_for_pred).log_softmax(2) # 最大確率を選択し、インデックスを文字にデコードします preds_size = torch.IntTensor([preds.size(1)] * batch_size) _, preds_index = preds.permute(1, 0, 2).max(2) preds_index = preds_index.transpose(1, 0).contiguous().view(-1) preds_str = converter.decode(preds_index.data, preds_size.data) else: preds = model(image, text_for_pred, is_train=False) # 最大確率を選択し、インデックスを文字にデコードします _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) print('-' * 80) print('image_path\tpredicted_labels') print('-' * 80) for img_name, pred in zip(image_path_list, preds_str): if 'Attn' == opt['Prediction']: pred = pred[:pred.find('[s]')] # 文の終わりトークン([s])の後の剪定 print(f'{img_name}\t{pred}')
def add_data(self, model, new_data, num_everyclass, device): """add the new data to datapool and reduce the quatity of data stored. Args: model: representer. new_data: list, [[data,[label]], ...] label_list: list, ['0'...] num_everyclass: int. """ # if the data pool isn't None, adjust the number of data stored if len(self.data_pool_dict) >= 0: remained_dict, msg = adjust_data_pool(self.data_pool_dict, num_everyclass) self.data_pool_dict = remained_dict logging.info(msg) logging.info("num_everyclass = {}".format(num_everyclass)) data_dict_tmp = {} feature_mean_dict = {} for class_label in new_data.classes: dataset_tmp = RawDataset(new_data.dir_data, new_data.dataname, new_data.task, [class_label]) dataloader_tmp = DataLoader(dataset_tmp, batch_size=16, num_workers=1) data_feature = get_output(model, dataloader_tmp, device) feature_mean = np.mean(data_feature, axis=0) dist_data = np.sum(data_feature - feature_mean, axis=1) idx_selected = np.argsort(dist_data)[:num_everyclass] data_selected = get_selected_idx(new_data.ts_list, idx_selected) data_dict_tmp[class_label] = data_selected feature_mean_dict[class_label] = feature_mean # update the data pool self.data_pool_dict.update(data_dict_tmp) self.feature_mean.update(feature_mean_dict) self.classes += new_data.classes self.num_everyclass = num_everyclass self.save_datapool_to_pkl()
def demo(opt): """ model configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) try: model = torch.nn.DataParallel(model).to(device) except RuntimeError: raise RuntimeError(device) # load model print('loading pretrained model from %s' % opt.saved_model) model.load_state_dict(torch.load(opt.saved_model, map_location=device)) # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data = RawDataset(root=opt.image_folder, opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader(demo_data, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) # predict model.eval() with torch.no_grad(): for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) image = image_tensors.to(device) # For max length prediction length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device) text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device) if 'CTC' in opt.Prediction: preds = model(image, text_for_pred) # Select max probabilty (greedy decoding) then decode index to character preds_size = torch.IntTensor([preds.size(1)] * batch_size) _, preds_index = preds.max(2) # preds_index = preds_index.view(-1) preds_str = converter.decode(preds_index, preds_size) else: preds, alphas = model(image, text_for_pred, is_train=False) alphas = alphas.detach().cpu().numpy() if opt.batch_max_length == 1: # select top_k probabilty (greedy decoding) then decode index to character k = opt.topk preds = F.softmax(preds, dim=2) topk_prob, topk_id = preds.topk(k) topk_id = topk_id.detach().cpu()[:, 0, :].unsqueeze( dim=1).numpy() # (batch_size, topk) # concat 3(['s']) to the end of ids topk_s = np.ones_like(topk_id) * 3 topk_id = np.concatenate((topk_id, topk_s), axis=1) topk_chars = converter.decode(topk_id, length_for_pred) topk_probs = topk_prob.detach().cpu( )[:, 0, :] # (batch_size, topk) else: # select max probabilty (greedy decoding) then decode index to character k = opt.topk # _, preds_index = preds.max(dim=2) # preds_str = converter.decode(preds_index, length_for_pred) preds = F.softmax(preds, dim=2) topk_prob, topk_id = preds.topk(k, dim=2) topk_id = topk_id.detach().cpu().numpy( ) # (batch_size, topk) topk_probs = topk_prob.detach().cpu() topk_strs = converter.decode(topk_id, length_for_pred) if opt.batch_max_length == 1: log = open(f'./log_demo_result.csv', 'a', encoding='utf-8') # topk_probs = F.softmax(topk_probs, dim=-1) for img_name, pred, pred_max_prob in zip( image_path_list, topk_chars, topk_probs): if 'Attn' in opt.Prediction: pred = [p[:p.find('[s]')] for p in pred ] # prune after "end of sentence" token ([s]) print(img_name, end='') log.write(img_name) for pred_char, pred_prob in zip(pred, pred_max_prob): print(',' + pred_char, end='') print(',%.4f' % pred_prob, end='') log.write(',' + pred_char) log.write(',%.4f' % pred_prob) print() log.write('\n') log.close() else: log = open(f'./log_demo_result.txt', 'a', encoding='utf-8') dashed_line = '-' * 80 head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score' print(f'{dashed_line}\n{head}\n{dashed_line}') log.write(f'{dashed_line}\n{head}\n{dashed_line}\n') # preds_prob = F.softmax(preds, dim=2) # preds_max_prob, _ = preds_prob.max(dim=2) if 'Attn' in opt.Prediction: for idx, (img_name, pred, pred_max_prob) in enumerate( zip(image_path_list, topk_strs, topk_probs)): pred_EOS = pred[0].find('[s]') pred = [s[:pred_EOS] for s in pred ] # prune after "end of sentence" token ([s]) pred_max_prob = pred_max_prob[:pred_EOS, :] if opt.output_split: alpha = alphas[idx, :, :].transpose() img = Image.open(img_name).convert('RGB') width, height = img.size alpha = alpha[:pred_EOS] if len(alpha) > 0: last_alpha_line = alpha[-1] # 消除padding的影响 seq_length = last_alpha_line.shape[0] column_range = np.arange(0, seq_length) ratio = height / width # too long, compress into opt shape, don't need pad if ratio > opt.imgH / opt.imgW: want_height = opt.imgW * ratio compress_ratio = want_height / opt.imgH expect_last_column = seq_length # need pad else: compress_ratio = 1 expect_height = height / width * opt.imgW expect_last_column = expect_height / opt.imgH * seq_length column_range = column_range - seq_length / 2 column_range = column_range / 320 * ( 320 + (compress_ratio - 1) * 32) column_range = column_range + seq_length / 2 # column_range = column_range - column_range[0] # last_column = np.argmax(last_alpha_line) last_column = np.dot(last_alpha_line, column_range) expect_linein = expect_last_column - last_column split_output = os.path.join( 'output', os.path.splitext( os.path.basename(img_name))[0] + '.txt') with open(split_output, 'w', encoding='utf-8') as fp: draw = ImageDraw.Draw(img) for alpha_line in alpha: column = np.dot( alpha_line, column_range) line_height = int( (column - expect_linein / 2) / (last_column - expect_linein / 2) * height) # line_height = int(column / last_column * height) line = [ 0, line_height, width - 1, line_height ] line = list(map(str, line)) fp.write(','.join(line) + '\n') draw.line(((0, line_height), (width - 1, line_height)), fill=(255, 0, 0), width=2) img.save( os.path.join( 'output', os.path.basename(img_name))) best_pred = pred[0] best_prob = pred_max_prob[:, 0] # calculate confidence score (= multiply of pred_max_prob) try: confidence_score = best_prob.cumprod(dim=0)[-1] except IndexError: confidence_score = 0.0 # print(f'{img_name:25s}\t{pred:25s}\t can\'t predict') # raise ValueError() print( f'{img_name:25s}\t{best_pred:25s}\t{confidence_score:0.4f}' ) log.write( f'{img_name:25s}\t{best_pred:25s}\t{confidence_score:0.4f}\n' ) for i in range(k): print(f'Candidatae {i:1d}: ', end='') for j in range(pred_EOS): print( f'{pred[i][j]}, prob: {pred_max_prob[j][i]:0.4f}\t', end='') print() else: preds_prob = F.softmax(preds, dim=2) preds_max_prob, _ = preds_prob.max(dim=2) for img_name, pred, pred_max_prob, pred_idx in zip( image_path_list, preds_str, preds_max_prob, preds_index): pred_EOS = len(pred) pred_max_prob = pred_max_prob[:pred_EOS] # calculate confidence score (= multiply of pred_max_prob) try: confidence_score = pred_max_prob.cumprod(dim=0)[-1] except IndexError: confidence_score = 0.0 # print(f'{img_name:25s}\t{pred:25s}\t can\'t predict') # raise ValueError() if opt.output_split: img = Image.open(img_name).convert('RGB') width, height = img.size pred_idx = pred_idx.detach().cpu().numpy().tolist() preds_len = len(pred_idx) ratio = height / width # too long, compress into opt shape, don't need pad if ratio > opt.imgH / opt.imgW: want_height = opt.imgW * ratio compress_ratio = want_height / opt.imgH expect_last_column = preds_len # need pad else: compress_ratio = 1 expect_height = height / width * opt.imgW expect_last_column = expect_height / opt.imgH * preds_len split_output = os.path.join( 'output', os.path.splitext(os.path.basename(img_name))[0] + '.txt') # hyper-parameter, suggestion 6-0.46-0.21 for 320CTC # TODO find hyper-parameter for 480CTC CTC_start = 6 center_ratio = 0.46 zoom_ratio = 0.21 # for CTC_start in np.arange(6.0, 7.1, 0.1): # for center_ratio in np.arange(0.37, 0.46, 0.01): # for zoom_ratio in np.arange(0.18, 0.23, 0.01): img = Image.open(img_name).convert('RGB') with open(split_output, 'w', encoding='utf-8') as fp: cur_pos = 0 draw = ImageDraw.Draw(img) index_group = itertools.groupby(pred_idx) for key, group in index_group: group = list(group) if key != 0: nxt_pos = cur_pos - 1 + len(group) column = (cur_pos + nxt_pos) // 2 column = column - CTC_start column = (column - preds_len * center_ratio) * (1 + zoom_ratio * compress_ratio) \ + (preds_len * center_ratio) line_height = int(column / expect_last_column * height) line = [ 0, line_height, width - 1, line_height ] line = list(map(str, line)) fp.write(','.join(line) + '\n') draw.line(((0, line_height), (width - 1, line_height)), fill=(255, 0, 0), width=2) cur_pos += len(group) img.save( os.path.join('output', os.path.basename(img_name))) # img.save(os.path.join('output', '{}_{:02d}_{:03d}_{:03d}.jpg'.format(os.path.splitext(os.path.basename(img_name))[0], int(CTC_start*10), int(center_ratio*100), int(zoom_ratio*100)))) print( f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}' ) log.write( f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n' ) log.close()
def extract_text(self): l = sorted(os.listdir(self.i_folder)) img_to_index = {} count = 0 for full_file in l: split_file = full_file.split(".") filename = split_file[0] img_to_index[count] = filename #print(count, filename) count += 1 #print(filename) file_extension = "." + split_file[1] #print(filename, file_extension) image = imgproc.loadImage(self.i_folder + full_file) bboxes, polys, score_text = self.test_net( self.net, image, self.text_threshold, self.link_threshold, self.low_text, self.cuda, self.poly, self.refine_net) img = cv2.imread(self.i_folder + filename + file_extension) rgb_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) points = [] order = [] for i in range(0, len(bboxes)): sample_bbox = bboxes[i] min_point = sample_bbox[0] max_point = sample_bbox[2] for j, p in enumerate(sample_bbox): if (p[0] <= min_point[0]): min_point = (p[0], min_point[1]) if (p[1] <= min_point[1]): min_point = (min_point[0], p[1]) if (p[0] >= max_point[0]): max_point = (p[0], max_point[1]) if (p[1] >= max_point[1]): max_point = (max_point[0], p[1]) min_point = (max(min(len(rgb_img[0]), min_point[0]), 0), max(min(len(rgb_img), min_point[1]), 0)) max_point = (max(min(len(rgb_img[0]), max_point[0]), 0), max(min(len(rgb_img), max_point[1]), 0)) points.append((min_point, max_point)) order.append(0) num_ordered = 0 rows_ordered = 0 points_sorted = [] ordered_points_index = 0 order_sorted = [] while (num_ordered < len(points)): #find lowest-y that is unordered min_y = len(rgb_img) min_y_index = -1 for i in range(0, len(points)): if (order[i] == 0): if (points[i][0][1] <= min_y): min_y = points[i][0][1] min_y_index = i rows_ordered += 1 order[min_y_index] = rows_ordered num_ordered += 1 points_sorted.append(points[min_y_index]) order_sorted.append(rows_ordered) ordered_points_index = len(points_sorted) - 1 # Group bboxes that are on the same row max_y = points[min_y_index][1][1] range_y = max_y - min_y for i in range(0, len(points)): if (order[i] == 0): min_y_i = points[i][0][1] max_y_i = points[i][1][1] range_y_i = max_y_i - min_y_i if (max_y_i >= min_y and min_y_i <= max_y): overlap = (min(max_y_i, max_y) - max(min_y_i, min_y)) / (max( 1, min(range_y, range_y_i))) if (overlap >= 0.30): order[i] = rows_ordered num_ordered += 1 min_x_i = points[i][0][0] for j in range(ordered_points_index, len(points_sorted) + 1): if (j < len(points_sorted) ): #insert before min_x_j = points_sorted[j][0][0] if (min_x_i < min_x_j): points_sorted.insert(j, points[i]) order_sorted.insert( j, rows_ordered) break else: #insert at the end of array points_sorted.insert(j, points[i]) order_sorted.insert(j, rows_ordered) break for i in range(0, len(points_sorted)): min_point = points_sorted[i][0] max_point = points_sorted[i][1] mask_file = self.result_folder + filename + "_" + str( order_sorted[i]) + "_" + str(i) + file_extension crop_image = rgb_img[int(min_point[1]):int(max_point[1]), int(min_point[0]):int(max_point[0])] #print(filename, min_point, max_point, len(rgb_img), len(rgb_img[0])) cv2.imwrite(mask_file, crop_image) AlignCollate_demo = AlignCollate(imgH=self.opt.imgH, imgW=self.opt.imgW, keep_ratio_with_pad=self.opt.PAD) demo_data = RawDataset(root=self.result_folder, opt=self.opt) # use RawDataset demo_loader = torch.utils.data.DataLoader( demo_data, batch_size=self.opt.batch_size, shuffle=False, num_workers=int(self.opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) f = open(self.extract_text_file, "w") count = -1 curr_order = 1 curr_filename = "" output_string = "" end_line = "[SEP] " with torch.no_grad(): for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) image = image_tensors.to(self.device) #image = (torch.from_numpy(crop_image).unsqueeze(0)).to(device) #print(image_path_list) #print(image.size()) length_for_pred = torch.IntTensor([self.opt.batch_max_length] * batch_size).to(self.device) text_for_pred = torch.LongTensor(batch_size, self.opt.batch_max_length + 1).fill_(0).to(self.device) preds = self.model(image, text_for_pred, is_train=False) _, preds_index = preds.max(2) preds_str = self.converter.decode(preds_index, length_for_pred) for path, p in zip(image_path_list, preds_str): #print(path) if 'Attn' in self.opt.Prediction: pred_EOS = p.find('[s]') p = p[: pred_EOS] # prune after "end of sentence" token ([s]) path_info = path[len(self.result_folder):].split( ".")[0].split( "_" ) #ASSUMES FILE EXTENSION OF SIZE 4 (.PNG, .JPG, ETC) #print(curr_filename) #print(path_info[0]) #print("PATHINFO: ",path_info[0]) if (not (curr_filename == path_info[0])): if (not (curr_filename == "")): f.write(str(count) + "\n") f.write(curr_filename + "\n") f.write(output_string + "\n\n") count += 1 curr_filename = img_to_index[count] #path_info[0] #print("CURRFILE: ", curr_filename) while (not (curr_filename == path_info[0])): f.write(str(count) + "\n") f.write(curr_filename + "\n") f.write("\n\n") count += 1 curr_filename = img_to_index[count] #path_info[0] #print("CURRFILE: ", curr_filename) output_string = "" curr_order = 1 if (int(path_info[1]) > curr_order): curr_order += 1 output_string += end_line output_string += p + " " f.write(str(count) + "\n") f.write(curr_filename + "\n") f.write(output_string + "\n\n") f.close()
def demo(opt, length, db_url): """ model configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) model = torch.nn.DataParallel(model).to(device) # load model print('loading pretrained model from %s' % opt.saved_model) model.load_state_dict(torch.load(opt.saved_model, map_location=device)) # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data = RawDataset(root=opt.image_folder, opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader(demo_data, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) # predict model.eval() with torch.no_grad(): for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) image = image_tensors.to(device) # For max length prediction length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device) text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device) if 'CTC' in opt.Prediction: preds = model(image, text_for_pred) # Select max probabilty (greedy decoding) then decode index to character preds_size = torch.IntTensor([preds.size(1)] * batch_size) _, preds_index = preds.max(2) # preds_index = preds_index.view(-1) preds_str = converter.decode(preds_index, preds_size) else: preds = model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) log = open(f'./log_demo_result.txt', 'a') dashed_line = '-' * 80 head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score' print(f'{dashed_line}\n{head}\n{dashed_line}') log.write(f'{dashed_line}\n{head}\n{dashed_line}\n') preds_prob = F.softmax(preds, dim=2) preds_max_prob, _ = preds_prob.max(dim=2) for img_name, pred, pred_max_prob in zip(image_path_list, preds_str, preds_max_prob): #we are only interested in plates themselves if img_name.find('plate_', 0, len(img_name)) == -1: continue if 'Attn' in opt.Prediction: pred_EOS = pred.find('[s]') pred = pred[: pred_EOS] # prune after "end of sentence" token ([s]) pred_max_prob = pred_max_prob[:pred_EOS] # calculate confidence score (= multiply of pred_max_prob) confidence_score = pred_max_prob.cumprod(dim=0)[-1] #getting name of the current image img_name = img_name.replace('.jpg', '') img_name = img_name.replace('res_', '') img_name = img_name.replace('plate_', '') #cutting piece of full path which equals length print(length) img_name = img_name[length:] print(img_name) #splitting into image name and db name res = re.split(r'/', img_name) base = res[0] img_name = res[1] print(base) #splitting into first number of image and second (which are frame num and id respectively) result = re.split(r'_', img_name) print(result) engine = create_engine(db_url) conn = engine.connect() #writing recognised numbers to db sql = text('UPDATE table_' + base + ' SET plate_number =' + pred + ' WHERE frame = ' + result[0] + ' AND id = ' + result[1] + ' ;') engine.execute(sql) #print(result[0]) #print(result[1]) #print(base) print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}') log.write( f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n') ##img_name =re.sub(r'\w+\/', '', img_name) ##result = re.split(r'_', img_name) log.close()
def demo(opt): """ model configuration """ converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) # print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, # opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, # opt.SequenceModeling, opt.Prediction) model = torch.nn.DataParallel(model).to(device) # load model print('loading pretrained model from %s' % opt.saved_model) model.load_state_dict(torch.load(opt.saved_model, map_location=device)) # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data = RawDataset(root=opt.image_folder, opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader(demo_data, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) # predict model.eval() with torch.no_grad(): for image_tensors, image_path_list in demo_loader: all_pred_strs = [] all_confidence_scores = [] batch_size = image_tensors.size(0) image = image_tensors.to(device) # For max length prediction length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device) text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device) predss = model(image, text_for_pred, is_train=False)[0] for i, preds in enumerate(predss): confidence_score_list = [] pred_str_list = [] # select max probability (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) preds_prob = F.softmax(preds, dim=2) preds_max_prob, _ = preds_prob.max(dim=2) for pred, pred_max_prob in zip(preds_str, preds_max_prob): pred_EOS = pred.find('[s]') pred = pred[: pred_EOS] # prune after "end of sentence" token ([s]) pred_str_list.append(pred) pred_max_prob = pred_max_prob[:pred_EOS] # calculate confidence score (= multiply of pred_max_prob) try: confidence_score = pred_max_prob.cumprod( dim=0)[-1].cpu().numpy() except: confidence_score = 0 # for empty pred case, when prune after "end of sentence" token ([s]) confidence_score_list.append(confidence_score) all_pred_strs.append(pred_str_list) all_confidence_scores.append(confidence_score_list) all_confidence_scores = np.array(all_confidence_scores) all_pred_strs = np.array(all_pred_strs) best_pred_index = np.argmax(all_confidence_scores, axis=0) best_pred_index = np.expand_dims(best_pred_index, axis=0) # Get max predition per image through blocks all_pred_strs = np.take_along_axis(all_pred_strs, best_pred_index, axis=0)[0] all_confidence_scores = np.take_along_axis(all_confidence_scores, best_pred_index, axis=0)[0] log = open(f'./log_demo_result.txt', 'w') dashed_line = '-' * 80 head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score' print(f'{dashed_line}\n{head}\n{dashed_line}') log.write(f'{dashed_line}\n{head}\n{dashed_line}\n') for img_name, pred, confidence_score in zip( image_path_list, all_pred_strs, all_confidence_scores): print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}') log.write( f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n') log.close()
def demo(opt): """ model configuration """ if 'Transformer' in opt.SequenceModeling: converter = TransformerLabelConverter(opt.character) elif 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) # load model if opt.saved_model != '': print('loading pretrained model from %s' % opt.saved_model) checkpoint = torch.load(opt.saved_model) if type(checkpoint) == dict: model.load_state_dict(checkpoint['state_dict']) else: model.load_state_dict(checkpoint) del checkpoint torch.cuda.empty_cache() model = torch.nn.DataParallel(model) if torch.cuda.is_available(): model = model.cuda() # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data = RawDataset(root=opt.image_folder, opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader(demo_data, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) # predict model.eval() dict_gt = {} with open('gt.txt', 'r') as gt_file: gt = gt_file.readlines() for line in gt: key = line.split(', "')[0] value = line.split(', "')[1].replace('"\n', '').lower() dict_gt[key] = value for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) with torch.no_grad(): image = image_tensors.cuda() # For max length prediction length_for_pred = torch.cuda.IntTensor([opt.batch_max_length] * batch_size) text_for_pred = torch.cuda.LongTensor( batch_size, opt.batch_max_length + 1).fill_(0) if 'Transformer' in opt.SequenceModeling: preds = model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) elif 'CTC' in opt.Prediction: preds = model(image, text_for_pred).log_softmax(2) # Select max probabilty (greedy decoding) then decode index to character preds_size = torch.IntTensor([preds.size(1)] * batch_size) _, preds_index = preds.permute(1, 0, 2).max(2) preds_index = preds_index.transpose(1, 0).contiguous().view(-1) preds_str = converter.decode(preds_index.data, preds_size.data) else: preds = model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) print('-' * 80) print('image_path\tpredicted_labels') print('-' * 80) for img_name, pred in zip(image_path_list, preds_str): if 'Transformer' in opt.SequenceModeling: pred = pred[:pred.find('</s>')] elif 'Attn' in opt.Prediction: # prune after "end of sentence" token ([s]) pred = pred[:pred.find('[s]')] raw_img = cv2.imread(img_name) raw_img = cv2.resize(raw_img, (200, 64)) tmp_img = np.zeros((128, 200, 3), np.uint8) tmp_img.fill(255) tmp_img[:64, :200] = raw_img raw_img = tmp_img font = cv2.FONT_HERSHEY_SIMPLEX bottomLeftCornerOfText = (5, 90) fontScale = 1 fontColor = (0, 0, 255) lineType = 2 if pred == dict_gt[img_name.split('/')[-1]]: cv2.putText(raw_img, pred, (5, 90), font, fontScale, (0, 255, 0), lineType) raw_img = raw_img[:96, :200] cv2.imwrite('./trash/true/' + img_name.split('/')[-1], raw_img) else: cv2.putText(raw_img, pred, (5, 90), font, fontScale, (0, 0, 255), lineType) cv2.putText(raw_img, dict_gt[img_name.split('/')[-1]], (5, 125), font, fontScale, (0, 255, 0), lineType) cv2.imwrite('./trash/false/' + img_name.split('/')[-1], raw_img) print(f'{img_name}\t{pred}')
def demo(opt): inputimage = opt.input_image boxesscv = opt.boxescsv bboxes = parse_csv(inputimage, boxesscv) """ model configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) model = torch.nn.DataParallel(model).to(device) # load model print('loading pretrained model from %s' % opt.saved_model) model.load_state_dict(torch.load(opt.saved_model, map_location=device)) # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data = RawDataset(root=opt.image_folder, opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader(demo_data, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) # predict model.eval() with torch.no_grad(): for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) image = image_tensors.to(device) # For max length prediction length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device) text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device) if 'CTC' in opt.Prediction: preds = model(image, text_for_pred) # Select max probabilty (greedy decoding) then decode index to character preds_size = torch.IntTensor([preds.size(1)] * batch_size) _, preds_index = preds.max(2) # preds_index = preds_index.view(-1) preds_str = converter.decode(preds_index, preds_size) else: preds = model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) log = open(f'{opt.output_folder}result.csv', 'w') preds_prob = F.softmax(preds, dim=2) preds_max_prob, _ = preds_prob.max(dim=2) for img_index, (pred, pred_max_prob) in enumerate( zip(preds_str, preds_max_prob)): if 'Attn' in opt.Prediction: pred_EOS = pred.find('[s]') pred = pred[: pred_EOS] # prune after "end of sentence" token ([s]) pred_max_prob = pred_max_prob[:pred_EOS] # calculate confidence score (= multiply of pred_max_prob) confidence_score = pred_max_prob.cumprod(dim=0)[-1] for pts in bboxes[img_index]: x, y = pts log.write(f'{x},{y},') log.write(f'{pred}\n') log.close() # copy log to local output folder os.system(f'cp {opt.output_folder}result.csv /input/output') shutil.make_archive('per_word_visual', 'zip', '/input/output')
def demo(opt): """ model configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) elif 'Bert' in opt.Prediction: converter = TransformerConverter(opt.character, opt.batch_max_length) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) opt.alphabet_size = len(opt.character) + 2 # +2 for [UNK]+[EOS] if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) model = torch.nn.DataParallel(model) if torch.cuda.is_available(): model = model.cuda() # load model print('loading pretrained model from %s' % opt.saved_model) model.load_state_dict(torch.load(opt.saved_model)) # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data = RawDataset(root=opt.image_folder, opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader( demo_data, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) # mkdir result experiment_name = os.path.join('./result', opt.image_folder.split('/')[-2]) if not os.path.exists(experiment_name): os.makedirs(experiment_name) result = {} # predict model.eval() for idx, (image_tensors, image_path_list) in enumerate(demo_loader): batch_size = image_tensors.size(0) with torch.no_grad(): image = image_tensors.cuda() # For max length prediction length_for_pred = torch.cuda.IntTensor([opt.batch_max_length] * batch_size) text_for_pred = torch.cuda.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0) if 'CTC' in opt.Prediction: preds = model(image, text_for_pred).log_softmax(2) # Select max probabilty (greedy decoding) then decode index to character preds_size = torch.IntTensor([preds.size(1)] * batch_size) _, preds_index = preds.permute(1, 0, 2).max(2) preds_index = preds_index.transpose(1, 0).contiguous().view(-1) preds_str = converter.decode(preds_index.data, preds_size.data) elif 'Bert' in opt.Prediction: with torch.no_grad(): pad_mask = None preds = model(image, pad_mask) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds[1].max(2) length_for_pred = torch.cuda.IntTensor([preds_index.size(-1)] * batch_size) preds_str = converter.decode(preds_index, length_for_pred) else: preds = model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) print(f'{idx}/{len(demo_data) / opt.batch_size}') for img_name, pred in zip(image_path_list, preds_str): if 'Attn' in opt.Prediction: pred = pred[:pred.find('[s]')] # prune after "end of sentence" token ([s]) # for show # write in json name = f'{img_name}'.split('/')[-1].replace('gt', 'res').split('.')[0] value = [{"transcription": f'{pred}'}] result[name] = value with open(f'{experiment_name}/result.json', 'w') as f: json.dump(result, f) print("writed finish...")
def demo(opt): """ model configuration """ if opt.guide_training : from model_guide import Model else : from model import Model if opt.baiduCTC: converter = CTCLabelConverterForBaiduWarpctc(opt.character) else : converter = CTCLabelConverter(opt.character) if opt.Prediction == 'Attn' : converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) opt.num_class_ctc = opt.num_class opt.num_class_attn = opt.num_class_ctc + 1 if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) model = torch.nn.DataParallel(model).to(device) # load model print('loading pretrained model from %s' % opt.saved_model) model.load_state_dict(torch.load(opt.saved_model, map_location=device), strict = False) # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data = RawDataset(root=opt.image_folder, opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader( demo_data, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) # predict model.eval() data = pd.DataFrame() with torch.no_grad(): ind = 0 for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) image = image_tensors.to(device) # For max length prediction length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device) text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device) if 'CTC' in opt.Prediction: if opt.guide_training : preds = model.module.inference(image, text_for_pred) else : preds = model(image, text_for_pred) # Select max probabilty (greedy decoding) then decode index to character preds_size = torch.IntTensor([preds.size(1)] * batch_size) # Select max probabilty (greedy decoding) then decode index to character if opt.baiduCTC: if (opt.beam_search): preds_index = preds else : _, preds_index = preds.max(2) preds_index = preds_index.view(-1) else: _, preds_index = preds.max(2) preds_str = converter.decode(preds_index.data, preds_size.data,opt.beam_search) else: preds = model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) log = open(f'./log_demo_result.txt', 'a') dashed_line = '-' * 80 head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score' print(f'{dashed_line}\n{head}\n{dashed_line}') log.write(f'{dashed_line}\n{head}\n{dashed_line}\n') preds_prob = F.softmax(preds, dim=2) preds_max_prob, _ = preds_prob.max(dim=2) for img_name, pred, pred_max_prob in zip(image_path_list, preds_str, preds_max_prob): if 'Attn' in opt.Prediction: pred_EOS = pred.find('[s]') pred = pred[:pred_EOS] # prune after "end of sentence" token ([s]) pred_max_prob = pred_max_prob[:pred_EOS] # calculate confidence score (= multiply of pred_max_prob) confidence_score = pred_max_prob.cumprod(dim=0)[-1] filename = img_name label = pred conf = round(confidence_score.item(),3) img = cv2.imread(filename) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_pil = Image.fromarray(img) img_buffer = io.BytesIO() img_pil.save(img_buffer, format="PNG") imgStr = base64.b64encode(img_buffer.getvalue()).decode("utf-8") data.loc[ind, 'img'] = '<img src="data:image/png;base64,{0:s}">'.format(imgStr) data.loc[ind, 'id'] = filename data.loc[ind, 'label'] = label data.loc[ind, 'conf'] = conf ind+=1 print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}') log.write(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n') log.close() html_all = data.to_html(escape=False) if opt.is_save : text_file = open("result.html", "w") text_file.write(html_all) text_file.close()
def index(): model, converter, length_for_pred, text_for_pred, opt = loader() start_time = time.time() AlignCollate_demo = AlignCollate(imgH=opt['imgH'], imgW=opt['imgW'], keep_ratio_with_pad=opt['PAD']) demo_data = RawDataset(root=opt['image_folder'], opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader(demo_data, batch_size=opt['batch_size'], shuffle=False, num_workers=int(opt['workers']), collate_fn=AlignCollate_demo, pin_memory=True) get_data = time.time() - start_time # predict with torch.no_grad(): for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) image = image_tensors.to(device) # 最大長予測用 # torch.cuda.synchronize(device) if 'CTC' in opt['Prediction']: preds = model(image, text_for_pred) #.log_softmax(2) preds = preds.log_softmax(2) # 最大確率を選択し、インデックスを文字にデコードします preds_size = torch.IntTensor([preds.size(1)] * batch_size) _, preds_index = preds.max(2) preds_index = preds_index.view(-1) preds_str = converter.decode(preds_index.data, preds_size.data) else: preds = model(image, text_for_pred, is_train=False) # 最大確率を選択し、インデックスを文字にデコードします _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) print('-' * 80) print('image_path\tpredicted_labels') print('-' * 80) for img_name, pred in zip(image_path_list, preds_str): if 'Attn' in opt['Prediction']: pred = pred[:pred.find('[s]')] # 文の終わりトークン([s])の後の剪定 print(f'{img_name}\t{pred}') forward_time = time.time() - start_time only_infer_time = forward_time - get_data print('*' * 80) print('get_dta_time:{:.5f}[sec]'.format(get_data)) print('only_infer_time:{:.5f}[sec]'.format(only_infer_time)) print('total_time:{:.5f}[sec]'.format(forward_time)) print('*' * 80) img_name = [i[9:] for i in image_path_list] items = {} for path, pred in zip(img_name, preds_str): items[path] = pred return render_template('index.html', images=items)
def runDeepTextNet(segmentedImagesList): opt = argparse.Namespace(FeatureExtraction='ResNet', PAD=False, Prediction='Attn', SequenceModeling='BiLSTM', Transformation='TPS', batch_max_length=25, batch_size=192, character='0123456789abcdefghijklmnopqrstuvwxyz', hidden_size=256, image_folder='demo_image/', imgH=32, imgW=100, input_channel=1, num_class=38, num_fiducial=20, num_gpu=0, output_channel=512, rgb=False, saved_model='TPS-ResNet-BiLSTM-Attn.pth', sensitive=False, workers=4) model = Model(opt) model = torch.nn.DataParallel(model).to('cpu') directory = "TPS-ResNet-BiLSTM-Attn.pth" model.load_state_dict(torch.load(directory, map_location='cpu')) converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data = RawDataset(root=segmentedImagesList, opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader(demo_data, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) # predict model.eval() out_preds_texts = [] for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) image = image_tensors.to(device) # For max length prediction length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device) text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device) preds = model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) preds_prob = F.softmax(preds, dim=2) preds_max_prob, _ = preds_prob.max(dim=2) for img_name, pred, pred_max_prob in zip(image_path_list, preds_str, preds_max_prob): if 'Attn' in opt.Prediction: pred_EOS = pred.find('[s]') pred = pred[: pred_EOS] # prune after "end of sentence" token ([s]) pred_max_prob = pred_max_prob[:pred_EOS] # calculate confidence score (= multiply of pred_max_prob) confidence_score = pred_max_prob.cumprod(dim=0)[-1] # print(pred) out_preds_texts.append(pred) # print(out_preds_texts) sentence_out = [' '.join(out_preds_texts)] return (sentence_out)
model.load_state_dict(pre) #text model------------------------- class args(object): #必要的一些参数设置 def __init__(self): self.rgb = True self.imgW = 128 self.imgH = 128 self.path = os.path.join(os.getcwd(), 'test_imgs') self.batch_size = 4 opt = args() test_loader = RawDataset(opt.path, opt) #length_of_data = len(test_loader)#图片数量 test_set = torch.utils.data.DataLoader(dataset=test_loader, batch_size=opt.batch_size, shuffle=False, pin_memory=True) model.eval() fig_i = 0 for batch_x, path_x in test_set: if len(batch_x) == 0: break fig_i += 1 x_tensors = batch_x.to(device) out = model(x_tensors) pred = torch.max(out, 1)[1]
def demo(opt): """ model configuration """ lists = [] #목적지라고 생각하는 사진에서 인식한 text를 담을 배열 converter = AttnLabelConverter(opt.character) #ATTN opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) #model.py의 Model import print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) #파라미터 값 정보 출력 model = torch.nn.DataParallel(model).to(device) #GPU로 데이터 병렬 처리 진행 # load model print('loading pretrained model from %s' % opt.saved_model) model.load_state_dict(torch.load(opt.saved_model, map_location=device)) #모델의 매개변수를 불러옴 AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data1 = RawDataset(root=opt.image_folder1, opt=opt) # use RawDataset 간판탐지결과 demo_data2 = RawDataset(root=opt.image_folder2, opt=opt) # use RawDataset 구글맵문자열탐지결과 demo_loader1 = torch.utils.data.DataLoader(demo_data1, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) demo_loader2 = torch.utils.data.DataLoader(demo_data2, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) # predict model.eval() with torch.no_grad(): for image_tensors, image_path_list in demo_loader1: batch_size = image_tensors.size(0) image = image_tensors.to(device) # For max length prediction length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device) text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device) #ATTn preds = model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) log = open(f'./log_demo_result.txt', 'a') #이어서 쓸수 있게 열고 dashed_line = '-' * 80 head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score' print(f'{dashed_line}\n{head}\n{dashed_line}') #테이블 양식 출력 log.write( f'{dashed_line}\n{head}\n{dashed_line}\n') #txt에 테이블 양식 저장 preds_prob = F.softmax(preds, dim=2) preds_max_prob, _ = preds_prob.max(dim=2) for img_name, pred, pred_max_prob in zip(image_path_list, preds_str, preds_max_prob): pred_EOS = pred.find('[s]') pred = pred[: pred_EOS] # prune after "end of sentence" token ([s]) pred_max_prob = pred_max_prob[:pred_EOS] # calculate confidence score (= multiply of pred_max_prob) confidence score 값을 계산 confidence_score = pred_max_prob.cumprod(dim=0)[-1] lists.append(pred) print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}' ) #구한 값을 출력 log.write( f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n' ) #구한 값을 txt에 저장 log.close() #파일 닫기 with torch.no_grad(): for image_tensors, image_path_list in demo_loader2: batch_size = image_tensors.size(0) image = image_tensors.to(device) # For max length prediction length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device) text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device) #ATTn preds = model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) log = open(f'./log_demo_result.txt', 'a') #이어서 쓸수 있게 열고 dashed_line = '-' * 80 head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score' print(f'{dashed_line}\n{head}\n{dashed_line}') #테이블 양식 출력 log.write( f'{dashed_line}\n{head}\n{dashed_line}\n') #txt에 테이블 양식 저장 preds_prob = F.softmax(preds, dim=2) preds_max_prob, _ = preds_prob.max(dim=2) for img_name, pred, pred_max_prob in zip(image_path_list, preds_str, preds_max_prob): pred_EOS = pred.find('[s]') pred = pred[: pred_EOS] # prune after "end of sentence" token ([s]) pred_max_prob = pred_max_prob[:pred_EOS] # confidence score 값을 계산 confidence_score = pred_max_prob.cumprod(dim=0)[-1] print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}' ) #구한 값을 출력 log.write( f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n' ) #구한 값을 txt에 저장 if pred in lists: print(pred + "은(는) 알맞은 목적지입니다.") else: print(pred + "은(는) 알맞은 목적지가 아닙니다.") log.close() #파일 닫기
def demo(args): """Open csv file wherein you are going to write the Predicted Words""" data = pd.read_csv('../data/craft_output/data.csv') """ model configuration """ if 'CTC' in args.Prediction: converter = CTCLabelConverter(args.character) else: converter = AttnLabelConverter(args.character) args.num_class = len(converter.character) if args.rgb: args.input_channel = 3 model = Model(args) print('model input parameters', args.imgH, args.imgW, args.num_fiducial, args.input_channel, args.output_channel, args.hidden_size, args.num_class, args.batch_max_length, args.Transformation, args.FeatureExtraction, args.SequenceModeling, args.Prediction) model = torch.nn.DataParallel(model).to(device) # load model print('loading pretrained model from %s' % args.saved_model) model.load_state_dict(torch.load(args.saved_model, map_location=device)) # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo AlignCollate_demo = AlignCollate(imgH=args.imgH, imgW=args.imgW, keep_ratio_with_pad=args.PAD) demo_data = RawDataset(root=args.image_folder, args=args) # use RawDataset demo_loader = torch.utils.data.DataLoader( demo_data, batch_size=args.batch_size, shuffle=False, num_workers=int(args.workers), collate_fn=AlignCollate_demo, pin_memory=True) # predict model.eval() with torch.no_grad(): for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) image = image_tensors.to(device) # For max length prediction length_for_pred = torch.IntTensor([args.batch_max_length] * batch_size).to(device) text_for_pred = torch.LongTensor(batch_size, args.batch_max_length + 1).fill_(0).to(device) if 'CTC' in args.Prediction: preds = model(image, text_for_pred) # Select max probabilty (greedy decoding) then decode index to character preds_size = torch.IntTensor([preds.size(1)] * batch_size) _, preds_index = preds.max(2) # preds_index = preds_index.view(-1) preds_str = converter.decode(preds_index.data, preds_size.data) else: preds = model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) dashed_line = '-' * 80 head = f'{"image_path":25s}\t {"predicted_labels":25s}\t confidence score' print(f'{dashed_line}\n{head}\n{dashed_line}') # log.write(f'{dashed_line}\n{head}\n{dashed_line}\n') preds_prob = F.softmax(preds, dim=2) preds_max_prob, _ = preds_prob.max(dim=2) for img_name, pred, pred_max_prob in zip(image_path_list, preds_str, preds_max_prob): start = '../data/crop_img/' path = os.path.relpath(img_name, start) folder = os.path.dirname(path) image_name=os.path.basename(path) file_name='_'.join(image_name.split('_')[:-8]) txt_file=os.path.join(start, folder, file_name) log = open(f'{txt_file}_log_demo_result.txt', 'a') if 'Attn' in args.Prediction: pred_EOS = pred.find('[s]') pred = pred[:pred_EOS] # prune after "end of sentence" token ([s]) pred_max_prob = pred_max_prob[:pred_EOS] # calculate confidence score (= multiply of pred_max_prob) confidence_score = pred_max_prob.cumprod(dim=0)[-1] print(f'{image_name:25s}\t {pred:25s}\t {confidence_score:0.4f}') log.write(f'{image_name:25s}\t {pred:25s}\t {confidence_score:0.4f}\n') log.close()
def demoToTxt1(image_folder, saved_model, txtFile): # sensitive parser = argparse.ArgumentParser() parser.add_argument('--image_folder', default=image_folder, help='path to image_folder which contains text images') parser.add_argument('--workers', type=int, help='number of data loading workers', default=4) parser.add_argument('--batch_size', type=int, default=100, help='input batch size') parser.add_argument('--saved_model', default=saved_model, help="path to saved_model to evaluation") """ Data processing """ parser.add_argument('--batch_max_length', type=int, default=20, help='maximum-label-length') parser.add_argument('--imgH', type=int, default=32, help='the height of the input image') parser.add_argument('--imgW', type=int, default=100, help='the width of the input image') parser.add_argument('--rgb', action='store_true', help='use rgb input') parser.add_argument('--character', type=str, default='0123456789', help='character label') parser.add_argument('--sensitive', default=True, help='for sensitive character mode') parser.add_argument('--PAD', default=False, action='store_true', help='whether to keep ratio then pad for image resize') """ Model Architecture """ parser.add_argument('--Transformation', default='TPS', type=str, help='Transformation stage. None|TPS') parser.add_argument('--FeatureExtraction', default='ResNet', type=str, help='FeatureExtraction stage. VGG|RCNN|ResNet') parser.add_argument('--SequenceModeling', default='BiLSTM', type=str, help='SequenceModeling stage. None|BiLSTM') parser.add_argument('--Prediction', default='CTC', type=str, help='Prediction stage. CTC|Attn') parser.add_argument('--num_fiducial', type=int, default=20, help='number of fiducial points of TPS-STN') parser.add_argument( '--input_channel', type=int, default=1, help='the number of input channel of Feature extractor') parser.add_argument( '--output_channel', type=int, default=512, help='the number of output channel of Feature extractor') parser.add_argument('--hidden_size', type=int, default=256, help='the size of the LSTM hidden state') opt = parser.parse_args() """ vocab / character number configuration """ if opt.sensitive: opt.character += 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' # opt.character = string.printable[:-6] # same with ASTER setting (use 94 char). cudnn.benchmark = True cudnn.deterministic = True opt.num_gpu = torch.cuda.device_count() """ model configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) model = torch.nn.DataParallel(model) if torch.cuda.is_available(): model = model.cuda() # load model print('loading pretrained model from %s' % opt.saved_model) model.load_state_dict(torch.load(opt.saved_model)) # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data = RawDataset(root=opt.image_folder, opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader(demo_data, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) # predict model.eval() saved_file = open(txtFile, 'w') for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) with torch.no_grad(): image = image_tensors.cuda() # For max length prediction length_for_pred = torch.cuda.IntTensor([opt.batch_max_length] * batch_size) text_for_pred = torch.cuda.LongTensor( batch_size, opt.batch_max_length + 1).fill_(0) if 'CTC' in opt.Prediction: preds = model(image, text_for_pred).log_softmax(2) # Select max probabilty (greedy decoding) then decode index to character preds_size = torch.IntTensor([preds.size(1)] * batch_size) _, preds_index = preds.permute(1, 0, 2).max(2) preds_index = preds_index.transpose(1, 0).contiguous().view(-1) preds_str = converter.decode(preds_index.data, preds_size.data) else: preds = model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) print('-' * 80) print('image_path\tpredicted_labels') print('-' * 80) for img_name, pred in zip(image_path_list, preds_str): if 'Attn' in opt.Prediction: pred = pred[:pred.find( '[s]')] # prune after "end of sentence" token ([s]) print(f'{img_name}\t{pred}') saved_file.write(f'{img_name}\t{pred}\n')
def load_data_pool(self): """load data from data pool, return a Dataset.""" logging.info("load data from data pool.") return RawDataset(self.dir_data, self.dataname, 'data_pool', self.classes)
def _textRecognition(opt): """ model configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) model = torch.nn.DataParallel(model).to(device) # load model print('loading pretrained model from %s' % opt.saved_model) model.load_state_dict(torch.load(opt.saved_model, map_location=device)) # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data = RawDataset(root=opt.image_folder, opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader(demo_data, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) # predict char_list = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" csv_filename = os.path.join(opt.output_dirpath, "text_information.csv") Header = ["bookID", "prediction"] with open(csv_filename, 'w') as f: writer = csv.DictWriter(f, fieldnames=Header) writer.writeheader() model.eval() with torch.no_grad(): for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) image = image_tensors.to(device) # For max length prediction length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device) text_for_pred = torch.LongTensor( batch_size, opt.batch_max_length + 1).fill_(0).to(device) if 'CTC' in opt.Prediction: preds = model(image, text_for_pred) # Select max probabilty (greedy decoding) then decode index to character preds_size = torch.IntTensor([preds.size(1)] * batch_size) _, preds_index = preds.max(2) preds_index = preds_index.view(-1) preds_str = converter.decode(preds_index.data, preds_size.data) else: preds = model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) #log = open(f'./text_information.csv', 'a') dashed_line = '-' * 80 head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score' print(f'{dashed_line}\n{head}\n{dashed_line}') preds_prob = F.softmax(preds, dim=2) preds_max_prob, _ = preds_prob.max(dim=2) for img_name, pred, pred_max_prob in zip( image_path_list, preds_str, preds_max_prob): if 'Attn' in opt.Prediction: pred_EOS = pred.find('[s]') pred = pred[: pred_EOS] # prune after "end of sentence" token ([s]) pred_max_prob = pred_max_prob[:pred_EOS] # calculate confidence score (= multiply of pred_max_prob) #print("{}:{}".format(pred_max_prob,len(pred_max_prob))) try: confidence_score = pred_max_prob.cumprod(dim=0)[-1] except: deleteImageAndText(opt.book_img_dirpath, img_name) continue #confidence_score = pred_max_prob.cumprod(dim=0)[-1] pred = pred[0] if confidence_score < 0.5: pred = "Unreadble" deleteImageAndText(opt.book_img_dirpath, img_name) continue elif not (pred in char_list): pred = "Undefined" # extract the name part of the image filename = os.path.basename(img_name) print( f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}') writer.writerow({"bookID": filename, "prediction": pred})
def demo(opt): """ model configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) model = torch.nn.DataParallel(model).to(device) # load model print('loading pretrained model from %s' % opt.saved_model) model.load_state_dict(torch.load(opt.saved_model, map_location=device)) # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data = RawDataset(root=opt.image_folder, opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader(demo_data, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) # predict model.eval() with torch.no_grad(): for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) image = image_tensors.to(device) # For max length prediction length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device) text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device) if 'CTC' in opt.Prediction: preds = model(image, text_for_pred).log_softmax(2) # Select max probabilty (greedy decoding) then decode index to character preds_size = torch.IntTensor([preds.size(1)] * batch_size) _, preds_index = preds.permute(1, 0, 2).max(2) preds_index = preds_index.transpose(1, 0).contiguous().view(-1) preds_str = converter.decode(preds_index.data, preds_size.data) else: preds = model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) print('-' * 80) print('image_path\tpredicted_labels') print('-' * 80) for img_name, pred in zip(image_path_list, preds_str): if 'Attn' in opt.Prediction: pred = pred[:pred.find( '[s]')] # prune after "end of sentence" token ([s]) print(f'{img_name}\t{pred}')
def demo(opt): """ model configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) #print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, # opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, # opt.SequenceModeling, opt.Prediction) model = torch.nn.DataParallel(model).to(device) # load model #print('loading pretrained model from %s' % opt.saved_model) model.load_state_dict(torch.load(opt.saved_model, map_location=device)) # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data = RawDataset(root=opt.image_folder, opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader(demo_data, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) # predict model.eval # Lista con los valores transcriptos predList = list() retList = dict() with torch.no_grad(): for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) image = image_tensors.to(device) # For max length prediction length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device) text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device) if 'CTC' in opt.Prediction: preds = model(image, text_for_pred).log_softmax(2) # Select max probabilty (greedy decoding) then decode index to character preds_size = torch.IntTensor([preds.size(1)] * batch_size) _, preds_index = preds.max(2) preds_index = preds_index.view(-1) preds_str = converter.decode(preds_index.data, preds_size.data) else: preds = model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) preds_prob = F.softmax(preds, dim=2) preds_max_prob, _ = preds_prob.max(dim=2) for img_name, pred, pred_max_prob in zip(image_path_list, preds_str, preds_max_prob): if 'Attn' in opt.Prediction: pred_EOS = pred.find('[s]') pred = pred[: pred_EOS] # prune after "end of sentence" token ([s]) pred_max_prob = pred_max_prob[:pred_EOS] # calculate confidence score (= multiply of pred_max_prob) confidence_score = pred_max_prob.cumprod(dim=0)[-1] #Transcripciones restult = { "img_name": img_name, "pred": str(pred), "confidence_score": f'{confidence_score:0.4f}' } predList.append(restult) #Imagenes location /location/ file_list_1 = os.listdir("../process/location") file_list_2 = os.listdir("../process/images") retList["pred"] = predList retList["localizacion_url"] = "/location/" + file_list_1[0] retList["image_url"] = "/images/" + file_list_2[0] #for file in file_list: # restult = {"localizacion_url": "/location/"+file} # retList[] #Imagenes image /images/ #file_list = os.listdir("../process/images") #for file in file_list: # restult = {"image_url": "/images/"+file} # retList.append(restult) #json_mylist = json.dumps(retList) print(retList)
def demo(opt): """ model configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) model = torch.nn.DataParallel(model).to(device) # load model print('loading pretrained model from %s' % opt.saved_model) model.load_state_dict(torch.load(opt.saved_model, map_location=device)) # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data = RawDataset(root=opt.image_folder, opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader(demo_data, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) # predict model.eval() with torch.no_grad(): for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) image = image_tensors.to(device) # For max length prediction length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device) text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device) if 'CTC' in opt.Prediction: preds = model(image, text_for_pred) # Select max probabilty (greedy decoding) then decode index to character preds_size = torch.IntTensor([preds.size(1)] * batch_size) _, preds_index = preds.max(2) # preds_index = preds_index.view(-1) preds_str = converter.decode(preds_index, preds_size) else: preds = model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) log = open(f'./log_demo_result.txt', 'a') dashed_line = '-' * 80 head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score' print(f'{dashed_line}\n{head}\n{dashed_line}') log.write(f'{dashed_line}\n{head}\n{dashed_line}\n') preds_prob = F.softmax(preds, dim=2) preds_max_prob, _ = preds_prob.max(dim=2) for img_name, pred, pred_max_prob in zip(image_path_list, preds_str, preds_max_prob): if 'Attn' in opt.Prediction: pred_EOS = pred.find('[s]') pred = pred[: pred_EOS] # prune after "end of sentence" token ([s]) pred_max_prob = pred_max_prob[:pred_EOS] # calculate confidence score (= multiply of pred_max_prob) confidence_score = pred_max_prob.cumprod(dim=0)[-1] print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}') log.write( f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n') custom_output.write( f'{img_name}\t{pred}\t{confidence_score:0.4f}\n') log.close()
#print("Cropped image") mask_file = result_folder + filename + "_" + str( order_sorted[i]) + "_" + str(i) + '.jpg' #print(mask_file) crop_image = rgb_img[int(min_point[1]):int(max_point[1]), int(min_point[0]):int(max_point[0])] #plt.imshow(crop_image) #plt.show() cv2.imwrite(mask_file, crop_image) # prepare data. two demo images from https://github.com/bgshih/crnn#run-demo #result_folder = './intermediate_result/' AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data = RawDataset(root=result_folder, opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader(demo_data, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.workers), collate_fn=AlignCollate_demo, pin_memory=True) print("Starting text classification") model.eval() with torch.no_grad(): for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) image = image_tensors.to(device) #image = (torch.from_numpy(crop_image).unsqueeze(0)).to(device) #print(image_path_list) #print(image.size())