def draw(scores, classification, transformed_anchors, img, labels, writer): """show the result of object detection.""" unnormalize = UnNormalizer() idxs = np.where(scores > 0.5) img = np.array(255 * unnormalize(img)).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = labels[int(classification[idxs[0][j]])] score = scores[idxs[0][j]] draw_caption(img, (x1, y1, x2, y2), '{}:{:.2f}'.format(label_name, score.item())) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) writer.add_image(label_name, img.transpose(2, 0, 1)) return
def detect_single_image(checkpoint, image_path, visualize=False): device = torch.device(type='cuda') if torch.cuda.is_available() else torch.device(type='cpu') configs = deepcopy(checkpoint['model_specs']['training_configs']) configs = configs.update(checkpoint['hp_values']) labels = checkpoint['labels'] num_classes = len(labels) retinanet = ret50(num_classes=num_classes, scales=configs['anchor_scales'], ratios=configs['anchor_ratios']) #TODO: make depth an input parameter retinanet.load_state_dict(checkpoint['model']) retinanet = retinanet.to(device=device) retinanet.eval() img = skimage.io.imread(image_path) if len(img.shape) == 2: img = skimage.color.gray2rgb(img) img = img.astype(np.float32) / 255.0 transform = transforms.Compose([Normalizer(), Resizer(min_side=608)]) #TODO: make this dynamic data = transform({'img': img, 'annot': np.zeros((0, 5))}) img = data['img'] img = img.unsqueeze(0) img = img.permute(0, 3, 1, 2) with torch.no_grad(): scores, classification, transformed_anchors = retinanet(img.to(device=device).float()) idxs = np.where(scores.cpu() > 0.5)[0] scale = data['scale'] detections_list = [] for j in range(idxs.shape[0]): bbox = transformed_anchors[idxs[j], :] label_idx = int(classification[idxs[j]]) label_name = labels[label_idx] score = scores[idxs[j]].item() # un resize for eval against gt bbox /= scale bbox.round() x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) detections_list.append([label_name, str(score), str(x1), str(y1), str(x2), str(y2)]) img_name = image_path.split('/')[-1].split('.')[0] filename = img_name + '.txt' path = os.path.dirname(image_path) filepathname = os.path.join(path, filename) with open(filepathname, 'w', encoding='utf8') as f: for single_det_list in detections_list: for i, x in enumerate(single_det_list): f.write(str(x)) f.write(' ') f.write('\n') if visualize: unnormalize = UnNormalizer() return filepathname
def visualize(args): model_path = args.model_path image_path = args.image_path use_gpu = args.use_gpu retinanet = torch.load(model_path) custom_labels = {"cobia"} label_map = {k: v + 1 for v, k in enumerate(custom_labels)} label_map["background"] = 0 rev_label_map = {v: k for k, v in label_map.items()} # Inverse mapping if use_gpu: retinanet = retinanet.cuda() unnormalize = UnNormalizer() retinanet.eval() with torch.no_grad(): st = time.time() img = cv2.imread(image_path) img = img.astype(np.float32) / 255.0 mean = np.array([[[0.485, 0.456, 0.406]]]) std = np.array([[[0.229, 0.224, 0.225]]]) img = (img.astype(np.float32) - mean) / std image_resizer = Resize_Img() img = image_resizer(img)["img"] img = np.expand_dims(img, axis=0) img = collat(img) scores, classification, transformed_anchors = retinanet( img.cuda().float()) print("Elapsed time: {}".format(time.time() - st)) idxs = np.where(scores.cpu() > 0.5) img = np.array(255 * unnormalize(img[0, :, :, :])).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = rev_label_map[int(classification[idxs[0][j]])] draw_caption(img, (x1, y1, x2, y2), label_name) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) print(label_name, x1, y1, x2, y2) cv2.imwrite("out.png", img)
def __init__(self): self.model = None self.transform = transforms.Compose([Normalizer(), Resizer()]) self.unnormalize = UnNormalizer() self.overlap_threshold = 0.6 self.score_threshold = 0.5 self.distance_threshold = 1. self.bboxes = None
def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--model', help='Path to model (.pt) file.') parser = parser.parse_args(args) if parser.dataset == 'coco': dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) retinanet = torch.load(parser.model) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() unnormalize = UnNormalizer() mAP = csv_eval.evaluate(dataset_val, retinanet) print(mAP)
def detect(checkpoint, pred_on_path, output_path, threshold=0.5, visualize=False, red_label='sick'): device = torch.device(type='cuda') if torch.cuda.is_available() else torch.device(type='cpu') if os.path.exists(output_path): shutil.rmtree(output_path) os.makedirs(output_path) logger.info('inside ' + str(pred_on_path) + ': ' + str(os.listdir(pred_on_path))) dataset_val = PredDataset(pred_on_path=pred_on_path, transform=transforms.Compose([Normalizer(), Resizer(min_side=608)])) #TODO make resize an input param logger.info('dataset prepared') dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=None) logger.info('data loader initialized') labels = checkpoint['labels'] logger.info('labels are: ' + str(labels)) num_classes = len(labels) configs = deepcopy(checkpoint['training_configs']) configs.update(checkpoint['hp_values']) logger.info('initializing object_detection model') model = retinanet(depth=checkpoint['depth'], num_classes=num_classes, scales=configs['anchor_scales'], ratios=configs['anchor_ratios']) #TODO: make depth an input parameter logger.info('loading weights') model.load_state_dict(checkpoint['model']) model = model.to(device=device) logger.info('model to device: ' + str(device)) model.eval() unnormalize = UnNormalizer() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) inference_times = [] for idx, data in enumerate(dataloader_val): scale = data['scale'][0] with torch.no_grad(): st = time.time() scores, classification, transformed_anchors = model(data['img'].to(device=device).float()) elapsed_time = time.time() - st print('Elapsed time: {}'.format(elapsed_time)) inference_times.append(elapsed_time) idxs = np.where(scores.cpu() > threshold)[0] if visualize: img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) detections_list = [] for j in range(idxs.shape[0]): bbox = transformed_anchors[idxs[j], :] if visualize: x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_idx = int(classification[idxs[j]]) label_name = labels[label_idx] score = scores[idxs[j]].item() if visualize: draw_caption(img, (x1, y1, x2, y2), label_name) if red_label in label_name: cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) else: cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 255, 0), thickness=2) print(label_name) # un resize for eval against gt bbox /= scale bbox.round() x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) detections_list.append([label_name, str(score), str(x1), str(y1), str(x2), str(y2)]) img_name = dataset_val.image_names[idx].split('/')[-1] i_name = img_name.split('.')[0] filename = i_name + '.txt' filepathname = os.path.join(output_path, filename) with open(filepathname, 'w', encoding='utf8') as f: for single_det_list in detections_list: for i, x in enumerate(single_det_list): f.write(str(x)) f.write(' ') f.write('\n') if visualize: save_to_path = os.path.join(output_path, img_name) cv2.imwrite(save_to_path, img) cv2.waitKey(0) print('average inference time per image: ', np.mean(inference_times)) return output_path
def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_test', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--model', help='Path to model (.pt) file.') parser = parser.parse_args(args) ''' if parser.dataset == 'coco': dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) ''' if parser.dataset == 'csv': dataset_test = CSVDataset(train_file=parser.csv_test, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), ValResizer()]), predict=True) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') #sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_test = DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=0, collate_fn=collater) retinanet = torch.load(parser.model) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() unnormalize = UnNormalizer() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) image_list = [] x1_list = [] width = [] y1_list = [] height = [] label_list = [] for idx, data in enumerate(dataloader_test): with torch.no_grad(): st = time.time() scores, classification, transformed_anchors = retinanet(data['img'].cuda().float()) #print(data['name'][0]) if (idx+1)%100 == 0: print(idx+1) #print('Elapsed time: {}'.format(time.time()-st)) idxs = np.where(scores>0.5) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() img[img<0] = 0 img[img>255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] image_list += [data['name'][0]][36:] x1 = int(bbox[0])*2 y1 = int(bbox[1])*2 x2 = int(bbox[2])*2 y2 = int(bbox[3])*2 x1_list += [str(x1)] y1_list += [str(y1)] width += [str(x2-x1)] height += [str(y2-y1)] label_list += [1] label_name = dataset_test.labels[int(classification[idxs[0][j]])] if idxs[0].shape[0] == 0: image_list += [data['name'][0]][36:] x1_list += [''] y1_list += [''] width += [''] height += [''] label_list += [0] if (idx+1)%50 == 0: print(len(image_list), len(x1_list), len(y1_list), len(width), len(height), len(label_list)) data = np.array([image_list]) data = np.append(data, [x1_list], axis=0) data = np.append(data, [y1_list], axis=0) data = np.append(data, [width], axis=0) data = np.append(data, [height], axis=0) data = np.append(data, [label_list], axis=0) dataframe = pd.DataFrame(data = data.T) dataframe.to_csv("prediction.csv",index=False,sep=',')
import torch import math import time import os import numpy as np import cv2 import matplotlib.pyplot as plt import torch.utils.model_zoo as model_zoo from torch.nn import init from utils import BasicBlock, Bottleneck, BBoxTransform, ClipBoxes from anchors import Anchors import losses from lib.nms.pth_nms import pth_nms from dataloader import UnNormalizer unnormalize = UnNormalizer() def nms(dets, thresh): "Dispatch to either CPU or GPU NMS implementations.\ Accept dets as tensor" "" return pth_nms(dets, thresh) class PyramidFeatures(nn.Module): def __init__(self, C3_size, C4_size, C5_size, feature_size=256): super(PyramidFeatures, self).__init__() # upsample C5 to get P5 from the FPN paper self.P5_1 = nn.Conv2d(C5_size, feature_size,
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--save_type', help='Saved model type is state_dict or model file') parser.add_argument('--model', help='Path to model (.pt) file.') parser.add_argument('--folder', help='Path to the evaluation images folder') parser.add_argument('--rec', type=bool) parser.add_argument('--video_file', help="Name of the file to be saved") parser = parser.parse_args(args) if parser.rec: assert parser.video_file is not None dataset_val = scotty_dataset(parser.folder, transform=transforms.Compose( [Normalizer(), Resizer()])) dataset_val_viz = scotty_dataset(parser.folder, transform=transforms.Compose([Resizer()])) dataloader_val = DataLoader(dataset_val, num_workers=1, shuffle=False) #retinanet = torch.load(parser.model) #retinanet = model.resnet18(num_classes=dataset_val.num_classes(),) #retinanet.load_state_dict(torch.load(parser.model)) retinanet = torch.load(parser.model) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() unnormalize = UnNormalizer() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) #initialize video wroter object if parser.rec: print("..Recording Video..") record = save_video(parser.video_file) pbar = tqdm.tqdm(range(len(dataloader_val))) alpha = 0.55 dummy_input = Variable(torch.rand(1, 3, 224, 224)) with SummaryWriter(comment='resnet18') as w: model = torchvision.models.resnet18() w.add_graph(model, (dummy_input, )) #-------------------------------------------------Initiate Training Loop----------------------------------------------------# for idx, data in enumerate(dataloader_val): with torch.no_grad(): torch.cuda.synchronize() st = time.time() scores, classification, transformed_anchors = retinanet( data['img'].permute(0, 3, 1, 2).float().cuda()) #print ("Image shape: {} ".format(data['img'].permute(0,3,1,2).shape)) pbar.write('Elapsed time: {}'.format(time.time() - st)) torch.cuda.synchronize() idxs = np.where(scores > 0.5) img = np.array( unnormalize(torch.squeeze(data["img"]).permute(2, 1, 0)).permute( 2, 1, 0)) img = cv2.cvtColor(img.astype(np.float32), cv2.COLOR_BGR2RGB) img = (255 * img).astype(np.uint8) img_dup = img.copy() for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int( classification[idxs[0][j]])] color = color_id(int(classification[idxs[0][j]])) draw_caption(img, (x1, y1, x2, y2), label_name) cv2.rectangle(img, (x1, y1), (x2, y2), color, -1) #print(label_name) pbar.update() pbar.set_description("Images Processed : {}/{}".format( idx, len(dataloader_val))) pbar.set_postfix("") cv2.addWeighted(img, alpha, img_dup, 1 - alpha, 0, img) if parser.rec: record.write(img) cv2.imshow('img', img) cv2.waitKey(0) if parser.rec: record.close()
def main(args=None): parser = argparse.ArgumentParser( description= 'Simple visualizing script for visualize a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--ROI_model', help='Path to ROI model (.pt) file.') parser.add_argument('--QRCode_model', help="path to QRcode model(.pt) file") parser = parser.parse_args(args) if parser.dataset == 'coco': dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([ Normalizer(ROI_mean, ROI_std), Resizer() ])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=None, sampler=None) ROI_net = torch.load(parser.ROI_model) QRCode_net = torch.load(parser.QRCode_model) use_gpu = True if use_gpu: ROI_net = ROI_net.cuda() QRCode_net = QRCode_net.cuda(0) ROI_net.eval() QRCode_net.eval() unnormalize = UnNormalizer(ROI_mean, ROI_std) def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) for idx, data in enumerate(dataloader_val): with torch.no_grad(): st = time.time() scores, classification, transformed_anchors = ROI_net( data['img'].cuda().float()) print('Elapsed time: {}'.format(time.time() - st)) # if batch_size = 1, and batch_sampler, sampler is None, then no_shuffle, will use sequential index, then the get_image_name is OK. # otherwise, it will failed. fn = dataset_val.get_image_name(idx) print('fn of image:', fn) idxs = np.where(scores.cpu() > 0.5) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) print("image shape when drawcaption:", img.shape) for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int( classification[idxs[0][j]])] draw_caption(img, (x1, y1, x2, y2), label_name) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) if idxs[0].shape[0] == 1: origin_img = cv2.imread(fn) ph, pw, _ = img.shape ret = convert_predict_to_origin_bbox(origin_img, pw, ph, x1, y1, x2, y2) if ret is None: print("ERROR: convert predicted origin bbox error") continue x1p, y1p, x2p, y2p = ret print("ROI predicted:", x1p, y1p, x2p, y2p) output_file.write(fn + ',' + str(x1p) + ',' + str(y1p) + ',' + str(x2p) + ',' + str(y2p) + ',ROI\n') print("!!!! FN {} saved!!!".format(fn)) ROI = origin_img[y1p:y2p, x1p:x2p] cv2.rectangle(origin_img, (x1p, y1p), (x2p, y2p), color=(0, 0, 255), thickness=8) #import pdb #pdb.set_trace() ROI = ROI.astype(np.float32) / 255.0 # normalize it ROI_normalized = (ROI - QRCode_mean) / QRCode_std #resize it rows, cols, cns = ROI_normalized.shape smallest_side = min(rows, cols) #rescale the image so the smallest side is min_side min_side = 600.0 max_side = 900.0 scale = min_side / smallest_side #check if the largest side is now greater than max_side, which can happen # when images have a large aspect ratio largest_side = max(rows, cols) if largest_side * scale > 900: scale = max_side / largest_side # resize the image with the computed scale ROI_scale = skimage.transform.resize( ROI_normalized, (int(round(rows * scale)), int(round((cols * scale))))) rows, cols, cns = ROI_scale.shape pad_w = 32 - rows % 32 pad_h = 32 - cols % 32 ROI_padded = np.zeros( (rows + pad_w, cols + pad_h, cns)).astype(np.float32) ROI_padded[:rows, :cols, :] = ROI_scale.astype(np.float32) x = torch.from_numpy(ROI_padded) print('x.shape:', x.shape) x = torch.unsqueeze(x, dim=0) print('x.shape after unsqueeze:', x.shape) x = x.permute(0, 3, 1, 2) print('x.shape after permute:', x.shape) scores, classification, transformed_anchors = QRCode_net( x.cuda().float()) print('scores:', scores) print('classification;', classification) print('transformed_anchors:', transformed_anchors) idxs = np.where(scores.cpu() > 0.5) predict_height, predict_width, _ = ROI_padded.shape for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) print("!!QRCode predicted bbox inside ROI:", x1, y1, x2, y2) ret = convert_predict_to_origin_bbox( ROI, predict_width, predict_height, x1, y1, x2, y2) if ret is None: continue qrcode_x1, qrcode_y1, qrcode_x2, qrcode_y2 = ret print('qrcode(bbox):', qrcode_x1, qrcode_y1, qrcode_x2, qrcode_y2) qrcode_img_x1 = x1p + qrcode_x1 qrcode_img_y1 = y1p + qrcode_y1 qrcode_img_x2 = x1p + qrcode_x2 qrcode_img_y2 = y1p + qrcode_y2 print('!!!QRCode in image:', qrcode_img_x1, qrcode_img_y1, qrcode_img_x2, qrcode_img_y2) cv2.rectangle(origin_img, (qrcode_img_x1, qrcode_img_y1), (qrcode_img_x2, qrcode_img_y2), color=(255, 0, 0), thickness=8) cv2.imwrite('origin_img_qrcode.png', origin_img) resized = cv2.resize(origin_img, (800, 600)) cv2.imshow('result', resized) else: not_processed_file.write(fn + ",,,,,\n") if debug: cv2.imshow('img', img) cv2.setWindowTitle('img', fn) key = cv2.waitKey(0) if 'q' == chr(key & 255): exit(0) output_file.close() not_processed_file.close()
def main(args=None): parser = argparse.ArgumentParser(description='Simple visualizing script for visualize a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--model', help='Path to model (.pt) file.') parser = parser.parse_args(args) if parser.dataset == 'coco': dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(mean, std), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') #sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) #dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=None, sampler=None) retinanet = torch.load(parser.model) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() unnormalize = UnNormalizer(mean, std) def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) for idx, data in enumerate(dataloader_val): with torch.no_grad(): st = time.time() scores, classification, transformed_anchors = retinanet(data['img'].cuda().float()) print('Elapsed time: {}'.format(time.time() - st)) # if batch_size = 1, and batch_sampler, sampler is None, then no_shuffle, will use sequential index, then the get_image_name is OK. # otherwise, it will failed. fn = dataset_val.get_image_name(idx) print('fn of image:', fn) idxs = np.where(scores.cpu() > 0.5) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) print("image shape when drawcaption:", img.shape) for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int(classification[idxs[0][j]])] draw_caption(img, (x1, y1, x2, y2), label_name) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) if idxs[0].shape[0] == 1: origin_img = cv2.imread(fn) ret = convert_predict_to_origin_bbox(origin_img, img, x1, y1, x2, y2) if ret is None: continue x1p, y1p, x2p, y2p = ret output_file.write(fn+','+str(x1p)+','+str(y1p)+','+str(x2p)+','+str(y2p)+',ROI\n') print("!!!! FN {} saved!!!".format(fn)) else: not_processed_file.write(fn+",,,,,\n") if debug: cv2.imshow('img', img) cv2.setWindowTitle('img', fn) key = cv2.waitKey(0) if 'q'==chr(key & 255): exit(0) output_file.close() not_processed_file.close()
def get_transcript(image_id, data, retinanet, score_threshold, nms_threshold, dataset_val, alphabet): image_name = image_id + '.jpg' retinanet.training = False gtxml_name = os.path.join(image_name.split('/')[-1].split('.')[-2]) pxml = pagexml.PageXML() unnormalize = UnNormalizer() with torch.no_grad(): st = time.time() im = data['img'] im = im.cuda().float() if retinanet.module.htr_gt_box: scores, classification, transformed_anchors, transcriptions = retinanet( [im, data['annot']]) else: scores, classification, transformed_anchors, transcriptions = retinanet( im) idxs = np.where(scores.cpu() > score_threshold) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() #img = np.array(255 * unnormalize(im)).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) width = img.shape[1] height = img.shape[0] conf = pagexml.ptr_double() pxml.newXml('retinanet_dets', image_name, width, height) page = pxml.selectNth("//_:Page", 0) reg = pxml.addTextRegion(page) pxml.setCoordsBBox(reg, 0, 0, width, height, conf) line = pxml.addTextLine(reg) pxml.setCoordsBBox(line, 0, 0, width, height, conf) words = [] transcriptions = np.argmax(transcriptions.cpu(), axis=-1) for j in range(idxs[0].shape[0]): # Initialize object for setting confidence values box = {} bbox = transformed_anchors[idxs[0][j], :] if idxs[0][j] >= transcriptions.shape[0]: continue transcription = transcriptions[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int(classification[idxs[0][j]])] # Add a text region to the Page word = pxml.addWord(line, "ID" + str(j)) # Set text region bounding box with a confidence pxml.setCoordsBBox(word, x1, y1, x2 - x1, y2 - y1, conf) #pxml.setCoordsBBox( reg,x1, y1, x2-x1, y2-y1, conf ) #transcription = transcripts[j] transcription = labels_to_text(transcription, alphabet) # Set the text for the text region conf.assign(0.9) pxml.setTextEquiv(word, transcription, conf) # Add property to text region pxml.setProperty(word, "category", label_name) # Add a second page with a text region and specific id #page = pxml.addPage("example_image_2.jpg", 300, 300) #reg = pxml.addTextRegion( page, "regA" ) #pxml.setCoordsBBox( reg, 15, 12, 76, 128 ) words.append(word) words = pxml.select('//_:Word') order, groups = pxml.getLeftRightTopBottomReadingOrder( words, fake_baseline=True, max_horiz_iou=1, prolong_alpha=0.0) line = pxml.selectNth('//_:TextLine') group_idx = 0 idx_in_group = 0 transcript_pred = [] for n in order: word_idx = order.index(n) if idx_in_group >= groups[group_idx]: group_idx += 1 idx_in_group = 0 transcript_pred.append(pxml.getTextEquiv(words[n])) pxml.setProperty(words[n], 'word_idx', str(word_idx)) pxml.setProperty(words[n], "line", str(group_idx)) pxml.moveElem(words[n], line) idx_in_group += 1 image_text = image_id + '.txt' # Write XML to file return " ".join(transcript_pred)
def infer(img_dir,classes_csv,model_fname,resnet_depth,score_thresh,out_dir, results_fname): # Create dataset img_list = [] if not isinstance(img_dir, list): img_dir = [img_dir] for dir in img_dir: for file in os.listdir(dir): if file.endswith(".png"): img_list.append(dir + file) dataset_val = CustomDataset(img_list=img_list, class_list=classes_csv, transform=transforms.Compose([Normalizer(), Resizer()])) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) print(dataset_val.num_classes()) # Create the model if resnet_depth == 18: retinanet = model.resnet18(num_classes=dataset_val.num_classes()) elif resnet_depth == 34: retinanet = model.resnet34(num_classes=dataset_val.num_classes()) elif resnet_depth == 50: retinanet = model.resnet50(num_classes=dataset_val.num_classes()) elif resnet_depth == 101: retinanet = model.resnet101(num_classes=dataset_val.num_classes()) elif resnet_depth == 152: retinanet = model.resnet152(num_classes=dataset_val.num_classes()) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') state_dict = torch.load(model_fname) from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove `module.` new_state_dict[name] = v # load params retinanet.load_state_dict(new_state_dict) if use_gpu: retinanet = retinanet.cuda() retinanet.eval() unnormalize = UnNormalizer() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) results = [] for idx, data in enumerate(dataloader_val): with torch.no_grad(): st = time.time() scores, classification, transformed_anchors = retinanet(data['img'].cuda().float()) print('Elapsed time: {}, Num objects: {}'.format(time.time() - st, len(scores))) idxs = np.where(scores > score_thresh) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)).astype(np.uint8).copy() bboxes = [] for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0] / data['scale'][0]) y1 = int(bbox[1] / data['scale'][0]) x2 = int(bbox[2] / data['scale'][0]) y2 = int(bbox[3] / data['scale'][0]) label_name = dataset_val.labels[int(classification[idxs[0][j]])] draw_caption(img, (x1, y1, x2, y2), label_name) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) score = float(scores[idxs[0][j]]) bboxes.append([x1, y1, x2, y2, score]) img_fname = ntpath.basename(data['img_fname'][0]) results.append([img_fname, bboxes]) # fig, ax = plt.subplots(figsize=(12, 12)) # ax.imshow(img, interpolation='bilinear') with open(out_dir+results_fname,"wb") as output_file: pickle.dump(results, output_file)
def generate_pagexml(image_id, data, retinanet, score_threshold, dataset_val, nms_threshold): image_name = image_id + '.jpg' file = 'pagexmls/' + image_name alphabet = " abcdefghijklmnopqrstuvwxy z" colors = get_n_random_colors(len(dataset_val.labels)) gtxml_name = os.path.join(image_name.split('/')[-1].split('.')[-2]) pxml = pagexml.PageXML() unnormalize = UnNormalizer() with torch.no_grad(): st = time.time() im = data['img'] im = im.cuda().float() scores, classification, transformed_anchors = retinanet( [im, nms_threshold]) print('Elapsed time: {}'.format(time.time() - st)) idxs = np.where(scores > score_threshold) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() #img = np.array(255 * unnormalize(im)).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) width = img.shape[1] height = img.shape[0] cv2.imwrite(file, img) conf = pagexml.ptr_double() pxml.newXml('retinanet_dets', image_name, width, height) page = pxml.selectNth("//_:Page", 0) reg = pxml.addTextRegion(page) pxml.setCoordsBBox(reg, 0, 0, width, height) line = pxml.addTextLine(reg) pxml.setCoordsBBox(line, 0, 0, width, height) words = [] for k in range(len(dataset_val.labels)): cv2.putText(img, dataset_val.labels[k], (25, 25 + k * 15), cv2.FONT_HERSHEY_PLAIN, 1, colors[k], 2) for j in range(idxs[0].shape[0]): # Initialize object for setting confidence values box = {} bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int(classification[idxs[0][j]])] cv2.rectangle(img, (x1, y1), (x2, y2), color=colors[int(classification[idxs[0][j]])], thickness=2) # Add a text region to the Page word = pxml.addWord(line, "ID" + str(j)) # Set text region bounding box with a confidence pxml.setCoordsBBox(word, x1, y1, x2 - x1, y2 - y1) #pxml.setCoordsBBox( reg,x1, y1, x2-x1, y2-y1, conf ) transcripts = [] confs = [] seq_len = int(bbox[4]) for k in range(seq_len + 1): transcripts.append( np.argmax(bbox[(5 + k * 27):((5 + (k + 1) * 27))])) transcripts = np.array(transcripts) transcript = labels_to_text(transcripts, alphabet) draw_caption( img, (x1, y1, x2, y2), "".join([ alphabet[transcripts[k]] for k in range(len(transcripts)) ])) # Set the text for the text region conf.assign(1) pxml.setTextEquiv( word, "".join([ alphabet[transcripts[k]] for k in range(len(transcripts)) ])) # Add property to text region pxml.setProperty(word, "category", label_name) words.append(word) words = pxml.select('//_:Word') order, groups = pxml.getLeftRightTopBottomReadingOrder( words, fake_baseline=True, max_horiz_iou=1, prolong_alpha=0.0) line = pxml.selectNth('//_:TextLine', 0) group_idx = 0 idx_in_group = 0 #line= pxml.addTextLine(reg,"ID"+str(group_idx+1)) for n in order: word_idx = order.index(n) if idx_in_group >= groups[group_idx]: #line = pxml.selectNth('//_:TextLine',group_idx,reg) #line= pxml.selectNth(reg) group_idx += 1 idx_in_group = 0 pxml.setProperty(words[n], 'word_idx', str(word_idx)) pxml.setProperty(words[n], "line", str(group_idx)) pxml.moveElem(words[n], line) idx_in_group += 1 # Write XML to file pxml.write('pagexmls/' + gtxml_name + ".xml") cv2.imwrite(str(image_id) + '.jpg', img)
def generate_pagexml(image_id, data, retinanet, score_threshold, nms_threshold, dataset_val): image_name = image_id + '.jpg' im_file_out = 'pagexmls/' + image_name alphabet = retinanet.alphabet #retinanet.score_threshold = torch.tensor(score_threshold).cuda().float() colors = get_n_random_colors(len(dataset_val.labels)) gtxml_name = os.path.join(image_name.split('/')[-1].split('.')[-2]) pxml = pagexml.PageXML() unnormalize = UnNormalizer() with torch.no_grad(): st = time.time() im = data['img'] im = im.cuda().float() print(retinanet.htr_gt_box) if retinanet.htr_gt_box: scores, classification, transformed_anchors, transcriptions = retinanet( [im, data['annot']]) score_threshold = 0 else: scores, classification, transformed_anchors, transcriptions = retinanet( im) n_boxes_predicted = transformed_anchors.shape[0] print(n_boxes_predicted, "BOXES PREDICTED") img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) width = img.shape[1] height = img.shape[0] cv2.imwrite(im_file_out, img) conf = pagexml.ptr_double() pxml.newXml('retinanet_dets', image_name, width, height) page = pxml.selectNth("//_:Page", 0) reg = pxml.addTextRegion(page) pxml.setCoordsBBox(reg, 0, 0, width, height, conf) line = pxml.addTextLine(reg) pxml.setCoordsBBox(line, 0, 0, width, height, conf) words = [] for k in range(len(dataset_val.labels)): cv2.putText(img, dataset_val.labels[k], (25, 25 + k * 15), cv2.FONT_HERSHEY_PLAIN, 1, colors[k], 2) transcriptions = np.argmax(transcriptions.cpu(), axis=-1) for box_id in range(n_boxes_predicted): # Initialize object for setting confidence values box = {} bbox = transformed_anchors[box_id, :] transcription = transcriptions[box_id, :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int(classification[box_id])] cv2.rectangle(img, (x1, y1), (x2, y2), color=colors[int(classification[box_id])], thickness=2) # Add a text region to the Page word = pxml.addWord(line, "ID" + str(box_id)) # Set text region bounding box with a confidence pxml.setCoordsBBox(word, x1, y1, x2 - x1, y2 - y1, conf) #pxml.setCoordsBBox( reg,x1, y1, x2-x1, y2-y1, conf ) #transcription = transcripts[j] transcription = labels_to_text(transcription, alphabet) draw_caption(img, (x1, y1, x2, y2), transcription) # Set the text for the text region conf.assign(0.9) pxml.setTextEquiv(word, transcription, conf) # Add property to text region pxml.setProperty(word, "category", label_name) # Add a second page with a text region and specific id #page = pxml.addPage("example_image_2.jpg", 300, 300) #reg = pxml.addTextRegion( page, "regA" ) #pxml.setCoordsBBox( reg, 15, 12, 76, 128 ) words.append(word) words = pxml.select('//_:Word') order, groups = pxml.getLeftRightTopBottomReadingOrder( words, fake_baseline=True, max_horiz_iou=1, prolong_alpha=0.0) line = pxml.selectNth('//_:TextLine') group_idx = 0 idx_in_group = 0 for n in order: word_idx = order.index(n) if idx_in_group >= groups[group_idx]: group_idx += 1 idx_in_group = 0 pxml.setProperty(words[n], 'word_idx', str(word_idx)) pxml.setProperty(words[n], "line", str(group_idx)) pxml.moveElem(words[n], line) idx_in_group += 1 # Write XML to file pxml.write('pagexmls/' + gtxml_name + ".xml") cv2.imwrite(os.path.join('pred_sample_ims', str(image_id) + '.jpg'), img)
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--model', help='Path to model (.pt) file.') parser.add_argument('--csv_train') parser = parser.parse_args(args) if parser.dataset == 'coco': dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) retinanet = torch.load(parser.model) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() unnormalize = UnNormalizer() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) cnt = 1 f = open("./output_files/output_retinanet_INbreast.txt", 'w') for idx, data in enumerate(dataloader_val): #print("data") #print(data) #print(data.keys()) print("SCale") print(data['scale']) scale = data['scale'][0] with torch.no_grad(): name = dataset_val.names[idx] arr = name.split("/") filename = arr[len(arr) - 1] #print("NAMEE", filename) temp = data['filename'][0] temparr = temp.split("/") resname = temparr[len(temparr) - 1] print(resname) st = time.time() scores, classification, transformed_anchors = retinanet( data['img'].cuda().float()) #print('Elapsed time: {}'.format(time.time()-st)) idxs = np.where(scores >= 0.001) #print("idxs") #print(idxs) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) scores = scores.cpu().numpy() for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int( classification[idxs[0][j]])] score = scores[j] print(score) f.write(resname + "," + str(x1 / scale) + "," + str(y1 / scale) + "," + str(x2 / scale) + "," + str(y2 / scale) + "," + str(scores[j]) + "," + label_name + "\n") #name = dataset_val.image_names[idx] #arr = name.split("/") #filename = arr[len(arr)-1] #print("NAMEE", filename) print(x1, y1, x2, y2) if (scores[j] >= 0.32): draw_caption(img, (x1, y1, x2, y2), label_name) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) #print(label_name) #cv2.imshow('img', img) #filename = findfilename(img) #print("FILENAMEEEEEEEEEEEEEEEEEEEE") #temp = data['filename'][0] #temparr = temp.split("/") #resname = temparr[len(temparr)-1] cv2.imwrite('./results/' + resname, img) cnt += 1 #break #cv2.waitKey(0) #cv2.destroyAllWindows() f.close()
def visualize(csv_val, csv_classes, model): dataset = "csv" if dataset == 'csv': dataset_val = CSVDataset(train_file=csv_val, class_list=csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) retinanet = torch.load(model) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() unnormalize = UnNormalizer() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 180, 0), 1) def draw_caption_original(image, box, caption): b = np.array(box).astype(int) #print("b", b) cv2.putText(image, caption, (b[0], b[3] + 20), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[3] + 20), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 180), 1) #B kaggle_ouput = [] for idx, data in enumerate(dataloader_val): print(idx) kaggle_row = [] with torch.no_grad(): st = time.time() #print("data shape:", data['img'].shape) scores, classification, transformed_anchors = retinanet( data['img'].cuda().float()) #print('Elapsed time: {}'.format(time.time()-st)) idxs = np.where(scores > 0.5) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() print('Scores', scores) #print("name", data['name']) img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) kaggle_row.append(get_filename(data['name'][0])) row = '' for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int( classification[idxs[0][j]])] draw_caption(img, (x1, y1, x2, y2), "Predicted opacity") cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 255, 0), thickness=2) #print(x1, y1, x2, y2) if (j == 0): row = row + str(round( scores[j].item(), 2)) + " " + str(x1) + ' ' + str( y1) + ' ' + str(x2 - x1) + ' ' + str(y2 - y1) pass else: row = row + " " + str(round( scores[j].item(), 2)) + " " + str(x1) + ' ' + str( y1) + ' ' + str(x2 - x1) + ' ' + str(y2 - y1) for ann in data['annot']: for annotation in ann: #print("Original annot:", ann) if annotation[0] != -1: draw_caption_original(img, (annotation[0], annotation[1], annotation[2], annotation[3]), "Real opacity") cv2.rectangle(img, (annotation[0], annotation[1]), (annotation[2], annotation[3]), color=(0, 0, 255), thickness=2) pass cv2.imshow('img', img) kaggle_row.append(row) #print(kaggle_row) #print(idxs) kaggle_ouput.append(kaggle_row) cv2.waitKey(0) import pandas as pd pd.DataFrame(kaggle_ouput, columns=[ 'patientId', 'PredictionString' ]).to_csv("/home/jdmaestre/PycharmProjects/test_kaggle.csv")
def bbox_extraction(file_list='./data/images2.csv'): weights_path = './models/csv_retinanet_25.pt' csv_classes = './classes.csv' dataset_val = CSVDataset(train_file=file_list, class_list=csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) # dataset_val = CSVDataset(train_file=file_list, class_list= csv_classes, transform=transforms.Compose([Normalizer()])) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=False) retinanet.load_state_dict(torch.load(weights_path)) use_gpu = True if torch.cuda.is_available(): device = torch.device("cuda") if use_gpu: retinanet = retinanet.to(device) retinanet.eval() unnormalize = UnNormalizer() for idx, data in enumerate(dataloader_val): with torch.no_grad(): scores, classification, transformed_anchors = retinanet( data['img'].to(device).float()) def get_bbox(classification, transformed_anchors, label=0): bbox = {} idx = np.where(classification == label)[0][0] co_ord = transformed_anchors[idx, :] bbox['x1'] = int(co_ord[0]) bbox['y1'] = int(co_ord[1]) bbox['x2'] = int(co_ord[2]) bbox['y2'] = int(co_ord[3]) return bbox scores = scores.cpu().numpy() classification = classification.cpu().numpy() transformed_anchors = transformed_anchors.cpu().numpy() # print('scores:',scores) # print('classification:', classification) # print('transformed_anchors', transformed_anchors) bbox = {} bbox['neck'] = get_bbox(classification, transformed_anchors, label=0) bbox['stomach'] = get_bbox(classification, transformed_anchors, label=1) # print('neck',bbox['neck'] ) # print('stomach',bbox['stomach'] ) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) cv2.rectangle(img, (bbox['neck']['x1'], bbox['neck']['y1']), (bbox['neck']['x2'], bbox['neck']['y2']), color=(0, 0, 255), thickness=2) cv2.rectangle(img, (bbox['stomach']['x1'], bbox['stomach']['y1']), (bbox['stomach']['x2'], bbox['stomach']['y2']), color=(0, 0, 255), thickness=2) # cv2.imshow('img', img) # cv2.imwrite('./sample_11.jpg',img) # cv2.waitKey(0) return bbox # bbox_extraction() # if __name__ == '__main__': # main()
def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--model', help='Path to model (.pt) file.') parser = parser.parse_args(args) if parser.dataset == 'coco': dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': dataset_val = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) retinanet = torch.load(parser.model) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() unnormalize = UnNormalizer() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) for idx, data in enumerate(dataloader_val): with torch.no_grad(): st = time.time() scores, classification, transformed_anchors = retinanet(data['img'].cuda().float()) print('Elapsed time: {}'.format(time.time() - st)) idxs = np.where(scores > 0.5) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int(classification[idxs[0][j]])] draw_caption(img, (x1, y1, x2, y2), label_name) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) print(label_name) cv2.imshow('img', img) cv2.waitKey(0)
def predict(folder_path, model_path): dataset_val = datasets.ImageFolder(folder_path, transform=transforms.Compose([Normalizer_only_image(), Resizer_only_img()])) # sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, collate_fn=collater_image_only, num_workers=1, batch_size=1) print(dataloader_val.dataset) #for asd in dataloader_val: # print(asd) # pass retinanet = torch.load(model_path) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() unnormalize = UnNormalizer() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) for idx, data in enumerate(dataloader_val): print("idx", idx) with torch.no_grad(): st = time.time() dataa = data[0] dataa = dataa.view(1,3,640,640) #print("shape dataaa:", dataa.shape) scores, classification, transformed_anchors = retinanet(dataa.cuda().float()) #print('Elapsed time: {}'.format(time.time()-st)) idxs = np.where(scores>0.5) img = np.array(255 * unnormalize(dataa[0, :, :, :])).copy() #print("Classification", classification) #print("Scores", scores) #print("transformed_anchors", transformed_anchors) # print("Shape", img.shape) img[img<0] = 0 img[img>255] = 255 img = np.transpose(img, (1, 2, 0)) import matplotlib.pyplot as plt plt.imshow(img, cmap='gray') #plt.show() img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) #label_name = dataset_val.labels[int(classification[idxs[0][j]])] draw_caption(img, (x1, y1, x2, y2), 'Opacity') cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) #print(label_name) cv2.imshow('img', img) cv2.waitKey(0)