def inference(model, detector, test_loader): model.eval() for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(test_loader): img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) # inference output = model(img) for idx in range(img.size(0)): print('detect {} images: {}.'.format(idx, meta['image_id'][idx])) tr_pred = output[idx, 0:2].softmax(dim=0).data.cpu().numpy() tcl_pred = output[idx, 2:4].softmax(dim=0).data.cpu().numpy() sin_pred = output[idx, 4].data.cpu().numpy() cos_pred = output[idx, 5].data.cpu().numpy() radii_pred = output[idx, 6].data.cpu().numpy() batch_result = detector.detect(tr_pred, tcl_pred, sin_pred, cos_pred, radii_pred) # (n_tcl, 3) # visualization img_show = img[idx].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype( np.uint8) visualize_detection(img_show, tr_pred[1], tcl_pred[1], batch_result[idx], '{}_{}'.format(i, meta['image_id'][idx]))
def inference(detector, test_loader, output_dir): total_time = 0. for i, (image, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(test_loader): image, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( image, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) torch.cuda.synchronize() start = time.time() idx = 0 # test mode can only run with batch_size == 1 # get detection result contours, output = detector.detect(image) torch.cuda.synchronize() end = time.time() total_time += end - start fps = (i + 1) / total_time print('detect {} / {} images: {}. ({:.2f} fps)'.format( i + 1, len(test_loader), meta['image_id'][idx], fps)) # visualization tr_pred, tcl_pred = output['tr'], output['tcl'] img_show = image[idx].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype(np.uint8) pred_vis = visualize_detection(img_show, contours, tr_pred[1], tcl_pred[1]) gt_contour = [] for annot, n_annot in zip(meta['annotation'][idx], meta['n_annotation'][idx]): if n_annot.item() > 0: gt_contour.append(annot[:n_annot].int().cpu().numpy()) gt_vis = visualize_detection(img_show, gt_contour, tr_mask[idx].cpu().numpy(), tcl_mask[idx].cpu().numpy()) im_vis = np.concatenate([pred_vis, gt_vis], axis=0) path = os.path.join(cfg.vis_dir, '{}_test'.format(cfg.exp_name), meta['image_id'][idx]) cv2.imwrite(path, im_vis) H, W = meta['Height'][idx].item(), meta['Width'][idx].item() img_show, contours = rescale_result(img_show, contours, H, W) # write to file mkdirs(output_dir) write_to_file( contours, os.path.join(output_dir, meta['image_id'][idx].replace('jpg', 'txt')))
def inference(model, detector, test_loader): model.eval() for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(test_loader): img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) # inference output = model(img) for idx in range(img.size(0)): print('detect {} / {} images: {}.'.format(i, len(test_loader), meta['image_id'][idx])) tr_pred = output[idx, 0:2].softmax(dim=0).data.cpu().numpy() tcl_pred = output[idx, 2:4].softmax(dim=0).data.cpu().numpy() sin_pred = output[idx, 4].data.cpu().numpy() cos_pred = output[idx, 5].data.cpu().numpy() radii_pred = output[idx, 6].data.cpu().numpy() batch_result = detector.detect(tr_pred, tcl_pred, sin_pred, cos_pred, radii_pred) # (n_tcl, 3) # visualization img_show = img[idx].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype( np.uint8) contours = result2polygon(img_show, batch_result) pred_vis = visualize_detection(img_show, tr_pred[1], tcl_pred[1], contours) gt_contour = [] for annot, n_annot in zip(meta['annotation'][idx], meta['n_annotation'][idx]): if n_annot.item() > 0: gt_contour.append(annot[:n_annot].int().cpu().numpy()) gt_vis = visualize_detection(img_show, tr_mask[idx].cpu().numpy(), tcl_mask[idx].cpu().numpy(), gt_contour) im_vis = np.concatenate([pred_vis, gt_vis], axis=0) path = os.path.join(cfg.vis_dir, '{}_test'.format(cfg.exp_name), meta['image_id'][idx]) cv2.imwrite(path, im_vis) H, W = meta['Height'][idx].item(), meta['Width'][idx].item() img_show, contours = rescale_result(img_show, contours, H, W) write_to_file( contours, os.path.join(cfg.output_dir, meta['image_id'][idx].replace('jpg', 'txt')))
def predict(self, img): # # Preprocessing (rows, cols, channels) = img.shape image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) torch.cuda.synchronize() start = time.time() x = image.astype(np.float32) x = (x / 255 - self.means) / self.stds x = x.astype(np.float32) x = x[:, :, ::-1].copy() x = torch.from_numpy(x).permute(2, 0, 1) x = Variable(x.unsqueeze(0)) if self.cuda_use: x = x.cuda() contours, output = self.detector.detect(x) torch.cuda.synchronize() end = time.time() print "Text Detection Time : {}".format(end - start) image, contours = rescale_result(image, contours, rows, cols) img_viz = visualize_detection(image, contours) return img_viz, contours
def inference(detector, test_loader, output_dir): total_time = 0. if cfg.exp_name != "MLT2017": osmkdir(output_dir) else: if not os.path.exists(output_dir): mkdirs(output_dir) for i, (image, meta) in enumerate(test_loader): image = to_device(image) torch.cuda.synchronize() idx = 0 # test mode can only run with batch_size == 1 # visualization img_show = image[idx].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype(np.uint8) img_show = cv2.cvtColor(img_show, cv2.COLOR_BGR2RGB) # get detection result contours, output = detector.detect(image, img_show) tr_pred, tcl_pred = output['tr'], output['tcl'] torch.cuda.synchronize() print('detect {} / {} images: {}.'.format(i + 1, len(test_loader), meta['image_id'][idx])) pred_vis = visualize_detection(img_show, contours, tr_pred[1], tcl_pred[1]) path = os.path.join(cfg.vis_dir, '{}_test'.format(cfg.exp_name), meta['image_id'][idx]) cv2.imwrite(path, pred_vis) H, W = meta['Height'][idx].item(), meta['Width'][idx].item() img_show, contours = rescale_result(img_show, contours, H, W) # write to file if cfg.exp_name == "Icdar2015": fname = "res_" + meta['image_id'][idx].replace('jpg', 'txt') contours = data_transfer_ICDAR(contours) write_to_file(contours, os.path.join(output_dir, fname)) elif cfg.exp_name == "TD500": fname = "res_" + meta['image_id'][idx].replace('JPG', 'txt') im_show = data_transfer_TD500(contours, os.path.join(output_dir, fname), img_show) id_img = meta['image_id'][idx].replace("img_", "").replace("JPG", "jpg") path = os.path.join(cfg.vis_dir, '{}_test'.format(cfg.exp_name), id_img) cv2.imwrite(path, im_show) else: fname = meta['image_id'][idx].replace('jpg', 'txt') write_to_file(contours, os.path.join(output_dir, fname))
def inference(detector, test_loader, output_dir): total_time = 0. for i, (image, meta) in enumerate(test_loader): # print (image) image = to_device(image) torch.cuda.synchronize() start = time.time() idx = 0 # test mode can only run with batch_size == 1 # get detection result contours, output = detector.detect(image) torch.cuda.synchronize() end = time.time() total_time += end - start fps = (i + 1) / total_time print('detect {} / {} images: {}. ({:.2f} fps)'.format( i, len(test_loader), meta['image_id'][idx], fps)) # visualization tr_pred, tcl_pred = output['tr'], output['tcl'] img_show = image[idx].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype(np.uint8) # print (meta) H, W = meta['Height'][idx].item(), meta['Width'][idx].item() img_show, contours = rescale_result(img_show, contours, H, W) # print (contours) pred_vis = visualize_detection(img_show, contours) path = os.path.join(cfg.vis_dir, '{}_deploy'.format(cfg.exp_name), meta['image_id'][idx]) cv2.imwrite(path, pred_vis) # write to file mkdirs(output_dir) write_to_file( contours, os.path.join(output_dir, meta['image_id'][idx].replace('jpg', 'txt')))
def inference(model, detector, test_loader): gt_json_path = os.path.join('/home/shf/fudan_ocr_system/datasets/', cfg.dataset, 'train_labels.json') #gt_json_path = '/workspace/mnt/group/ocr/wangxunyan/maskscoring_rcnn/crop_train/crop_result_js.json' with open(gt_json_path, 'r') as f: gt_dict = json.load(f) model.eval() result = dict() for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(test_loader): timer = {'model': 0, 'detect': 0, 'viz': 0, 'restore': 0} start = time.time() img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) # inference output = model(img) if cfg.multi_scale: size_h, size_w = img.shape[2:4] img_rescale = func.interpolate(img, scale_factor=0.5, mode='nearest') output_rescale = model(img_rescale) output_rescale = func.interpolate(output_rescale, size=(size_h, size_w), mode='nearest') timer['model'] = time.time() - start for idx in range(img.size(0)): start = time.time() print('detect {} / {} images: {}.'.format(i, len(test_loader), meta['image_id'][idx])) tr_pred = output[idx, 0:2].softmax(dim=0).data.cpu().numpy() tcl_pred = output[idx, 2:4].softmax(dim=0).data.cpu().numpy() sin_pred = output[idx, 4].data.cpu().numpy() cos_pred = output[idx, 5].data.cpu().numpy() radii_pred = output[idx, 6].data.cpu().numpy() # tr_pred_mask = 1 / (1 + np.exp(-12*tr_pred[1]+3)) tr_pred_mask = np.where(tr_pred[1] > detector.tr_conf_thresh, 1, tr_pred[1]) # tr_pred_mask = fill_hole(tr_pred_mask) tcl_pred_mask = (tcl_pred * tr_pred_mask)[1] > detector.tcl_conf_thresh if cfg.multi_scale: tr_pred_rescale = output_rescale[ idx, 0:2].sigmoid().data.cpu().numpy() tcl_pred_rescale = output_rescale[idx, 2:4].softmax( dim=0).data.cpu().numpy() tr_pred_scale_mask = np.where( tr_pred_rescale[1] + tr_pred[1] > 1, 1, tr_pred_rescale[1] + tr_pred[1]) tr_pred_mask = tr_pred_scale_mask # weighted adding origin_ratio = 0.5 rescale_ratio = 0.5 tcl_pred = (tcl_pred * origin_ratio + tcl_pred_rescale * rescale_ratio).astype( np.float32) tcl_pred_mask = (tcl_pred * tr_pred_mask)[1] > detector.tcl_conf_thresh batch_result = detector.complete_detect(tr_pred_mask, tcl_pred_mask, sin_pred, cos_pred, radii_pred) # (n_tcl, 3) timer['detect'] = time.time() - start start = time.time() # visualization img_show = img[idx].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype( np.uint8) H, W = meta['Height'][idx].item(), meta['Width'][idx].item() # get pred_contours contours = result2polygon(img_show, batch_result) if cfg.viz: resize_H = H if H % 32 == 0 else (H // 32) * 32 resize_W = W if W % 32 == 0 else (W // 32) * 32 ratio = float(img_show.shape[0] ) / resize_H if resize_H > resize_W else float( img_show.shape[1]) / resize_W resize_H = int(resize_H * ratio) resize_W = int(resize_W * ratio) gt_info = gt_dict[int(meta['image_id'][idx].lstrip( 'gt_').rstrip('.jpg').split('_')[1])] gt_contours = [] # for gt in gt_info: # if not gt['illegibility']: # gt_cont = np.array(gt['points']) # gt_cont[:, 0] = (gt_cont[:, 0] * float(resize_W) / W).astype(np.int32) # gt_cont[:, 1] = (gt_cont[:, 1] * float(resize_H) / H).astype(np.int32) # gt_contours.append(gt_cont) gt_cont = np.array(gt_info['points']) gt_cont[:, 0] = gt_cont[:, 0] * float(resize_W) / float(W) gt_cont[:, 1] = gt_cont[:, 1] * float(resize_H) / float(H) gt_contours.append(gt_cont.astype(np.int32)) illegal_contours = mask2conts( meta['illegal_mask'][idx].cpu().numpy()) predict_vis = visualize_detection( img_show, tr_pred_mask, tcl_pred_mask.astype(np.uint8), contours.copy()) gt_vis = visualize_detection(img_show, tr_mask[idx].cpu().numpy(), tcl_mask[idx].cpu().numpy(), gt_contours, illegal_contours) im_vis = np.concatenate([predict_vis, gt_vis], axis=0) path = os.path.join(cfg.vis_dir, meta['image_id'][idx]) cv2.imwrite(path, im_vis) timer['viz'] = time.time() - start start = time.time() polygons = calc_confidence(contours, tr_pred) img_show, polygons = rescale_padding_result( img_show, polygons, H, W) # filter too small polygon for i, poly in enumerate(polygons): if cv2.contourArea(poly['points']) < 100: polygons[i] = [] polygons = [item for item in polygons if item != []] # convert np.array to list for polygon in polygons: polygon['points'] = polygon['points'].tolist() result[meta['image_id'][idx].replace('.jpg', '').replace('gt', 'res')] = polygons timer['restore'] = time.time() - start print( 'Cost time {:.2f}s: model {:.2f}s, detect {:.2f}s, viz {:.2f}s, restore {:.2f}s' .format( timer['model'] + timer['detect'] + timer['viz'] + timer['restore'], timer['model'], timer['detect'], timer['viz'], timer['restore'])) # write to json file with open(os.path.join(cfg.output_dir, 'result.json'), 'w') as f: json.dump(result, f) print("Output json file in {}.".format(cfg.output_dir))
def inference(detector, test_loader, output_dir): total_time = 0. post_all_time =0. net_all_time = 0. backbone_all_time = 0. IM_all_time = 0. detach_all_time =0. if cfg.exp_name != "MLT2017": osmkdir(output_dir) else: if not os.path.exists(output_dir): mkdirs(output_dir) for i, (image, train_mask, tr_mask, meta) in enumerate(test_loader): image, train_mask, tr_mask = to_device(image, train_mask, tr_mask) torch.cuda.synchronize() idx = 0 # test mode can only run with batch_size == 1 # visualization img_show = image[idx].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype(np.uint8) # compute time start = time.time() # get detection result contours, output, net_time, post_time = detector.detect(image, img_show) end = time.time() #total_time += end - start total_time += (net_time + post_time) post_all_time += post_time net_all_time += net_time backbone_all_time+= output["backbone_time"] IM_all_time += output["IM_time"] detach_all_time += output["detach_time"] fps = (i + 1) / total_time print('detect {} / {} images: {}. ({:.2f} fps); backbone-time:{:.2f}, IM-time:{:.2f}, post-time:{:0.2f}, Transfer-time:{:.2f}'.format(i + 1, len(test_loader), meta['image_id'][idx], fps, backbone_all_time*1000/(i+1), IM_all_time*1000/(i+1), post_all_time*1000/(i+1), detach_all_time*1000/(i+1))) if cfg.exp_name == "Icdar2015" or cfg.exp_name == "MLT2017" or cfg.exp_name == "TD500": pred_vis = visualize_detection(img_show, output['bbox'], output['tr']) else: pred_vis = visualize_detection(img_show, contours, output['tr']) gt_contour = [] for annot, n_annot in zip(meta['annotation'][idx], meta['n_annotation'][idx]): if n_annot.item() > 0: gt_contour.append(annot[:n_annot].int().cpu().numpy()) gt_vis = visualize_gt(img_show, gt_contour, tr_mask[idx].cpu().numpy()) im_vis = np.concatenate([pred_vis, gt_vis], axis=0) path = os.path.join(cfg.vis_dir, '{}_test'.format(cfg.exp_name), meta['image_id'][idx].split(".")[0]+".jpg") cv2.imwrite(path, im_vis) H, W = meta['Height'][idx].item(), meta['Width'][idx].item() img_show, contours = rescale_result(img_show, contours, H, W) # write to file if cfg.exp_name == "Icdar2015": fname = "res_" + meta['image_id'][idx].replace('jpg', 'txt') contours = data_transfer_ICDAR(contours) write_to_file(contours, os.path.join(output_dir, fname)) elif cfg.exp_name == "MLT2017": out_dir = os.path.join(output_dir, str(cfg.checkepoch)) if not os.path.exists(out_dir): mkdirs(out_dir) fname = meta['image_id'][idx].split("/")[-1].replace('ts', 'res') fname = fname.split(".")[0] + ".txt" data_transfer_MLT2017(contours, os.path.join(out_dir, fname)) elif cfg.exp_name == "TD500": fname = "res_" + meta['image_id'][idx].split(".")[0]+".txt" data_transfer_TD500(contours, os.path.join(output_dir, fname)) else: fname = meta['image_id'][idx].replace('jpg', 'txt') write_to_file(contours, os.path.join(output_dir, fname))
def inference(detector, test_loader, output_dir): total_time = 0. if cfg.exp_name != "MLT2017": osmkdir(output_dir) else: if not os.path.exists(output_dir): mkdirs(output_dir) for i, (image, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map, meta) in enumerate(test_loader): image, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device( image, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map) torch.cuda.synchronize() start = time.time() idx = 0 # test mode can only run with batch_size == 1 # visualization img_show = image[idx].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype(np.uint8) # get detection result contours, output = detector.detect(image, img_show) tr_pred, tcl_pred = output['tr'], output['tcl'] torch.cuda.synchronize() end = time.time() total_time += end - start fps = (i + 1) / total_time print('detect {} / {} images: {}. ({:.2f} fps)'.format(i + 1, len(test_loader), meta['image_id'][idx], fps)) pred_vis = visualize_detection(img_show, contours, tr_pred[1], tcl_pred[1]) gt_contour = [] for annot, n_annot in zip(meta['annotation'][idx], meta['n_annotation'][idx]): if n_annot.item() > 0: gt_contour.append(annot[:n_annot].int().cpu().numpy()) gt_vis = visualize_gt(img_show, gt_contour, tr_mask[idx].cpu().numpy(), tcl_mask[idx, :, :, 0].cpu().numpy()) im_vis = np.concatenate([pred_vis, gt_vis], axis=0) # path = os.path.join(cfg.vis_dir, '{}_test'.format(cfg.exp_name), meta['image_id'][idx]) # cv2.imwrite(path, im_vis) H, W = meta['Height'][idx].item(), meta['Width'][idx].item() img_show, contours = rescale_result(img_show, contours, H, W) # write to file if cfg.exp_name == "Icdar2015": fname = "res_" + meta['image_id'][idx].replace('jpg', 'txt') contours = data_transfer_ICDAR(contours) write_to_file(contours, os.path.join(output_dir, fname)) elif cfg.exp_name == "MLT2017": path = os.path.join(cfg.vis_dir, '{}_test'.format(cfg.exp_name), meta['image_id'][idx].split("/")[-1]) cv2.imwrite(path, im_vis) out_dir = os.path.join(output_dir, str(cfg.checkepoch)) if not os.path.exists(out_dir): mkdirs(out_dir) fname = meta['image_id'][idx].split("/")[-1].replace('ts', 'res') fname = fname.split(".")[0] + ".txt" data_transfer_MLT2017(contours, os.path.join(out_dir, fname)) elif cfg.exp_name == "TD500": fname = "res_img_" + meta['image_id'][idx].replace('jpg', 'txt') data_transfer_TD500(contours, os.path.join(output_dir, fname)) else: fname = meta['image_id'][idx].replace('jpg', 'txt') write_to_file(contours, os.path.join(output_dir, fname))
def Predict(self, image_path, output_img_path="output.jpg", output_txt_path="output.txt", tr_thresh=0.4, tcl_thresh=0.4): cfg = self.system_dict["local"]["cfg"] model = self.system_dict["local"]["model"] start = time.time() image = pil_load_img(image_path) transform = BaseTransform(size=cfg.input_size, mean=cfg.means, std=cfg.stds) H, W, _ = image.shape image, polygons = transform(image) # to pytorch channel sequence image = image.transpose(2, 0, 1) meta = { 'image_id': 0, 'image_path': image_path, 'Height': H, 'Width': W } image = torch.from_numpy(np.expand_dims(image, axis=0)) image = to_device(image) if (self.system_dict["local"]["cfg"].cuda): torch.cuda.synchronize() end = time.time() print("Image loading time: {}".format(end - start)) start = time.time() detector = TextDetector(model, tr_thresh=tr_thresh, tcl_thresh=tcl_thresh) # get detection result contours, output = detector.detect(image) torch.cuda.synchronize() end = time.time() print("Inference time - {}".format(end - start)) start = time.time() tr_pred, tcl_pred = output['tr'], output['tcl'] img_show = image[0].permute(1, 2, 0).cpu().numpy() img_show = ((img_show * cfg.stds + cfg.means) * 255).astype(np.uint8) img_show, contours = rescale_result(img_show, contours, H, W) pred_vis = visualize_detection(img_show, contours) cv2.imwrite(output_img_path, pred_vis) # write to file self.write_to_file(contours, output_txt_path) end = time.time() print("Writing output time - {}".format(end - start))