def _infer(path_to_input_image: str, path_to_output_image: str, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): image = transforms.Image.open(path_to_input_image) dataset_class = DatasetBase.from_name(dataset_name) image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooling_mode=Config.POOLING_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) forward_input = Model.ForwardInput.Eval(image_tensor.cuda()) forward_output: Model.ForwardOutput.Eval = model.eval().forward(forward_input) detection_bboxes = forward_output.detection_bboxes / scale detection_classes = forward_output.detection_classes detection_probs = forward_output.detection_probs kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] draw = ImageDraw.Draw(image) for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = random.choice(['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) image.save(path_to_output_image) print(f'Output image is saved to {path_to_output_image}')
def _eval(path_to_checkpoint: str, dataset_name: str, backbone_name: str, path_to_data_dir: str, path_to_results_dir: str): dataset = DatasetBase.from_name(dataset_name)(path_to_data_dir, DatasetBase.Mode.EVAL, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) evaluator = Evaluator(dataset, path_to_data_dir, path_to_results_dir) Log.i('Found {:d} samples'.format(len(dataset))) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) Log.i('Start evaluating with 1 GPU (1 batch per GPU)') mean_ap, detail = evaluator.evaluate(model) Log.i('Done') Log.i('mean AP = {:.4f}'.format(mean_ap)) Log.i('\n' + detail)
def _infer_stream(path_to_input_stream_endpoint: str, period_of_inference: int, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) if path_to_input_stream_endpoint.isdigit(): path_to_input_stream_endpoint = int(path_to_input_stream_endpoint) video_capture = cv2.VideoCapture(path_to_input_stream_endpoint) with torch.no_grad(): for sn in itertools.count(start=1): _, frame = video_capture.read() if sn % period_of_inference != 0: continue timestamp = time.time() image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(image) image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_classes, detection_probs, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] draw = ImageDraw.Draw(image) for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = random.choice(['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) image = np.array(image) frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) elapse = time.time() - timestamp fps = 1 / elapse cv2.putText(frame, f'FPS = {fps:.1f}', (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA) cv2.imshow('easy-faster-rcnn.pytorch', frame) if cv2.waitKey(10) == 27: break video_capture.release() cv2.destroyAllWindows()
def _infer(path_to_input_dir: str, path_to_output_dir: str, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) images = glob.glob(path_to_input_dir + '/*.jpg') with torch.no_grad(): for image in tqdm(images): name = image.split("/")[-1] image = transforms.Image.open(image).convert("RGB") image_tensor, scale = dataset_class.preprocess( image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_classes, detection_probs, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] draw = ImageDraw.Draw(image) for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = random.choice( ['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] draw.rectangle( ((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) image.save(path_to_output_dir + name) print(f'Output image is saved to {path_to_output_dir}')
def _infer_websocket(path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) async def handler(websocket, path): print('Connection established:', path) with torch.no_grad(): while True: frame = await websocket.recv() frame = np.frombuffer(frame, dtype=np.uint8).reshape(480, 640, 3) image = Image.fromarray(frame) image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_classes, detection_probs, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] message = [] for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] message.append({ 'left': int(bbox.left), 'top': int(bbox.top), 'right': int(bbox.right), 'bottom': int(bbox.bottom), 'category': category }) message = json.dumps(message) await websocket.send(message) server = websockets.serve(handler, host='*', port=8765, max_size=2 ** 32, compression=None) asyncio.get_event_loop().run_until_complete(server) print('Service is ready. Please navigate to http://127.0.0.1:8000/') asyncio.get_event_loop().run_forever()
def __init__(self, filename): """Initialize the class.""" self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.DEBUG) self.dataset_class = DatasetBase.from_name('voc2007') self.backbone = BackboneBase.from_name('resnet101')(pretrained=False) self.model = Model(self.backbone, self.dataset_class.num_classes(), pooler_mode=Pooler.Mode.ALIGN, anchor_ratios=[(1, 2), (1, 1), (2, 1)], anchor_sizes=[128, 256, 512], rpn_pre_nms_top_n=6000, rpn_post_nms_top_n=300).cpu() self.load(filename)
def evaluate(model, path_to_images_dir: str, path_to_annotation: str, iou_thres, conf_thres, nms_thres, img_size, batch_size, num_workers): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.eval() # Get dataloader dataset = DatasetBase.from_name( 'tiny-person' )('data/tiny_set/train', 'data/tiny_set/erase_with_uncertain_dataset/annotations/corner/task/tiny_set_train_sw640_sh512_all.json', DatasetBase.Mode.TRAIN) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=dataset.collate_fn) Tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.FloatTensor labels = [] sample_metrics = [] # List of tuples (TP, confs, pred) for imgs, targets in tqdm.tqdm(dataloader, desc="Detecting objects"): imgs = imgs.to(device) targets = targets.to(device) with torch.no_grad(): outputs = model(imgs) outputs = non_max_suppression(outputs, conf_thres=conf_thres, nms_thres=nms_thres) sample_metrics += get_batch_statistics(outputs, targets, iou_threshold=iou_thres) # Concatenate sample statistics true_positives, pred_scores, pred_labels = [ torch.cat(x, 0) for x in list(zip(*sample_metrics)) ] precision, recall, AP, f1, ap_class = ap_per_class(true_positives, pred_scores, pred_labels, labels) return precision, recall, AP, f1, ap_class, dataset
def draw(path_to_input_image, dataset_name): image = transforms.Image.open(path_to_input_image) dataset_class = DatasetBase.from_name(dataset_name) image_tensor, scale = dataset_class.preprocess(image, 600.0, 1000.0) #annotation_path = 'data/sunprimitive/annotations/val.json' annotation_path = 'data/container/vertices/MVI_3015.MP4.json' with open(annotation_path) as f: annotations = json.load(f) gt = annotations[path_to_input_image.split('/')[-1]] gt_vertices = [obj['vertices'] for obj in gt] image = np.array(image) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) colors = ['red', 'green', 'blue', 'yellow', 'purple', 'white'] for vert in gt_vertices: quads = [] quads.append(((int(vert[0][0]), int(vert[1][0])), (int(vert[0][1]), int(vert[1][1])), (int(vert[0][3]), int(vert[1][3])), (int(vert[0][2]), int(vert[1][2])), (int(vert[0][0]), int(vert[1][0])))) quads.append(((int(vert[0][0]), int(vert[1][0])), (int(vert[0][4]), int(vert[1][4])), (int(vert[0][5]), int(vert[1][5])), (int(vert[0][1]), int(vert[1][1])), (int(vert[0][0]), int(vert[1][0])))) quads.append(((int(vert[0][0]), int(vert[1][0])), (int(vert[0][4]), int(vert[1][4])), (int(vert[0][6]), int(vert[1][6])), (int(vert[0][2]), int(vert[1][2])), (int(vert[0][0]), int(vert[1][0])))) quads.append(((int(vert[0][1]), int(vert[1][1])), (int(vert[0][5]), int(vert[1][5])), (int(vert[0][7]), int(vert[1][7])), (int(vert[0][3]), int(vert[1][3])), (int(vert[0][1]), int(vert[1][1])))) quads.append(((int(vert[0][4]), int(vert[1][4])), (int(vert[0][5]), int(vert[1][5])), (int(vert[0][7]), int(vert[1][7])), (int(vert[0][6]), int(vert[1][6])), (int(vert[0][4]), int(vert[1][4])))) quads.append(((int(vert[0][2]), int(vert[1][2])), (int(vert[0][3]), int(vert[1][3])), (int(vert[0][7]), int(vert[1][7])), (int(vert[0][6]), int(vert[1][6])), (int(vert[0][2]), int(vert[1][2])))) for i in range(8): cv2.putText(image,str(i), (int(vert[0][i]), int(vert[1][i])), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,0),1) for i,quad in enumerate(quads): image = cv2.line(image, quad[0], quad[1], (255,255,0), 1) image = cv2.line(image, quad[1], quad[2], (255,255,0), 1) image = cv2.line(image, quad[2], quad[3], (255,255,0), 1) image = cv2.line(image, quad[3], quad[0], (255,255,0), 1) if len(gt_vertices) > 0: path_to_output_image = os.path.join('images/container_gt', path_to_input_image.split('/')[-1]) cv2.imwrite(path_to_output_image, image)
def __init__(self, path_to_checkpoint, dataset_name='obstacle', backbone_name='resnet101', prob_thresh=0.6): self.path_to_checkpoint = path_to_checkpoint self.dataset_name = dataset_name self.backbone_name = backbone_name self.prob_thresh = prob_thresh self.dataset_class = DatasetBase.from_name(dataset_name) self.backbone = BackboneBase.from_name(backbone_name)(pretrained=False) # Set up model self.model = Model( self.backbone, self.dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() self.model.load(path_to_checkpoint) self.model.eval() # Set in evaluation mode
def _infer_stream(path_to_input_stream_endpoint: str, path_to_output_dir: str, period_of_inference: int, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) if path_to_input_stream_endpoint.isdigit(): path_to_input_stream_endpoint = int(path_to_input_stream_endpoint) video_capture = cv2.VideoCapture(path_to_input_stream_endpoint) with torch.no_grad(): frame_num = 1 for sn in itertools.count(start=1): _, frame = video_capture.read() if sn % period_of_inference != 0: continue timestamp = time.time() image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(image) image_tensor, scale = dataset_class.preprocess( image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_classes, detection_probs, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] # draw = ImageDraw.Draw(image) jsonData = OrderedDict() resultData = OrderedDict() detectionResultDataList = [] image_name = path_to_input_stream_endpoint.split('/')[-1] jsonData["image_path"] = image_name jsonData["modules"] = "Faster_R-CNN_ResNet101" jsonData["cam_id"] = "0" jsonData["frame_num"] = frame_num for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = 'yellow' # color = random.choice(['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] detectionResultData = OrderedDict() detectionResultData["label"] = [{ 'description': category, 'score': prob }] detectionResultData["position"] = { 'x': bbox.left, 'y': bbox.top, 'w': (bbox.right - bbox.left), 'h': (bbox.bottom - bbox.top) } detectionResultDataList.append(detectionResultData) # draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) # draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) resultData["module_name"] = "Faster_R-CNN_ResNet101" resultData["detection_result"] = detectionResultDataList jsonData["results"] = [resultData] output_file_path = path_to_output_dir + "/" + datetime.now( ).strftime("%Y-%m-%d__%H:%M:%S.%f__") + image_name.split( '.')[0] + ".json" with open('{}'.format(output_file_path), 'w', encoding="utf-8") as make_file: json.dump(jsonData, make_file, ensure_ascii=False, indent="\t") print(f'Saved JSON File : [NAME] {output_file_path}') frame_num += 1 # image = np.array(image) # frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # elapse = time.time() - timestamp # fps = 1 / elapse # cv2.putText(frame, f'FPS = {fps:.1f}', (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA) # cv2.imshow('easy-faster-rcnn.pytorch', frame) # if cv2.waitKey(10) == 27: # break video_capture.release() cv2.destroyAllWindows()
def _infer(path_to_input_image: str, path_to_output_dir: str, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) with torch.no_grad(): image = transforms.Image.open(path_to_input_image) image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_classes, detection_probs, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] jsonData = OrderedDict() resultData = OrderedDict() detectionResultDataList = [] image_name = path_to_input_image.split('/')[-1] frame_num = image_name.split('.')[0].split('_')[-1] jsonData["image_path"] = image_name jsonData["modules"] = "Faster_R-CNN_ResNet101" jsonData["cam_id"] = "0" jsonData["frame_num"] = frame_num for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] detectionResultData = OrderedDict() detectionResultData["label"] = [{ 'description': category, 'score': prob }] detectionResultData["position"] = { 'x': bbox.left, 'y': bbox.top, 'w': (bbox.right - bbox.left), 'h': (bbox.bottom - bbox.top) } detectionResultDataList.append(detectionResultData) resultData["module_name"] = "Faster_R-CNN_ResNet101" resultData["detection_result"] = detectionResultDataList jsonData["results"] = [resultData] output_file_path = path_to_output_dir + "/" + datetime.now().strftime( "%Y-%m-%d__%H:%M:%S__") + image_name.split('.')[0] + ".json" with open('{}'.format(output_file_path), 'w', encoding="utf-8") as make_file: json.dump(jsonData, make_file, ensure_ascii=False, indent="\t")
def _train(dataset_name: str, backbone_name: str, path_to_data_dir: str, path_to_checkpoints_dir: str, path_to_resuming_checkpoint: Optional[str]): dataset = DatasetBase.from_name(dataset_name)(path_to_data_dir, DatasetBase.Mode.TRAIN, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) dataloader = DataLoader(dataset, batch_size=Config.BATCH_SIZE, sampler=DatasetBase.NearestRatioRandomSampler(dataset.image_ratios, num_neighbors=Config.BATCH_SIZE), num_workers=8, collate_fn=DatasetBase.padding_collate_fn, pin_memory=True) Log.i('Found {:d} samples'.format(len(dataset))) backbone = BackboneBase.from_name(backbone_name)(pretrained=True) model = nn.DataParallel( Model( backbone, dataset.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N, anchor_smooth_l1_loss_beta=Config.ANCHOR_SMOOTH_L1_LOSS_BETA, proposal_smooth_l1_loss_beta=Config.PROPOSAL_SMOOTH_L1_LOSS_BETA ).cuda() ) optimizer = optim.SGD(model.parameters(), lr=Config.LEARNING_RATE, momentum=Config.MOMENTUM, weight_decay=Config.WEIGHT_DECAY) scheduler = WarmUpMultiStepLR(optimizer, milestones=Config.STEP_LR_SIZES, gamma=Config.STEP_LR_GAMMA, factor=Config.WARM_UP_FACTOR, num_iters=Config.WARM_UP_NUM_ITERS) step = 0 time_checkpoint = time.time() losses = deque(maxlen=100) summary_writer = SummaryWriter(os.path.join(path_to_checkpoints_dir, 'summaries')) should_stop = False num_steps_to_display = Config.NUM_STEPS_TO_DISPLAY num_steps_to_snapshot = Config.NUM_STEPS_TO_SNAPSHOT num_steps_to_finish = Config.NUM_STEPS_TO_FINISH if path_to_resuming_checkpoint is not None: step = model.module.load(path_to_resuming_checkpoint, optimizer, scheduler) Log.i(f'Model has been restored from file: {path_to_resuming_checkpoint}') device_count = torch.cuda.device_count() assert Config.BATCH_SIZE % device_count == 0, 'The batch size is not divisible by the device count' Log.i('Start training with {:d} GPUs ({:d} batches per GPU)'.format(torch.cuda.device_count(), Config.BATCH_SIZE // torch.cuda.device_count())) while not should_stop: for _, (_, image_batch, _, bboxes_batch, labels_batch) in enumerate(dataloader): batch_size = image_batch.shape[0] image_batch = image_batch.cuda() bboxes_batch = bboxes_batch.cuda() labels_batch = labels_batch.cuda() anchor_objectness_losses, anchor_transformer_losses, proposal_class_losses, proposal_transformer_losses = \ model.train().forward(image_batch, bboxes_batch, labels_batch) anchor_objectness_loss = anchor_objectness_losses.mean() anchor_transformer_loss = anchor_transformer_losses.mean() proposal_class_loss = proposal_class_losses.mean() proposal_transformer_loss = proposal_transformer_losses.mean() loss = anchor_objectness_loss + anchor_transformer_loss + proposal_class_loss + proposal_transformer_loss optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() losses.append(loss.item()) summary_writer.add_scalar('train/anchor_objectness_loss', anchor_objectness_loss.item(), step) summary_writer.add_scalar('train/anchor_transformer_loss', anchor_transformer_loss.item(), step) summary_writer.add_scalar('train/proposal_class_loss', proposal_class_loss.item(), step) summary_writer.add_scalar('train/proposal_transformer_loss', proposal_transformer_loss.item(), step) summary_writer.add_scalar('train/loss', loss.item(), step) step += 1 if step == num_steps_to_finish: should_stop = True if step % num_steps_to_display == 0: elapsed_time = time.time() - time_checkpoint time_checkpoint = time.time() steps_per_sec = num_steps_to_display / elapsed_time samples_per_sec = batch_size * steps_per_sec eta = (num_steps_to_finish - step) / steps_per_sec / 3600 avg_loss = sum(losses) / len(losses) lr = scheduler.get_lr()[0] Log.i(f'[Step {step}] Avg. Loss = {avg_loss:.6f}, Learning Rate = {lr:.8f} ({samples_per_sec:.2f} samples/sec; ETA {eta:.1f} hrs)') if step % num_steps_to_snapshot == 0 or should_stop: path_to_checkpoint = model.module.save(path_to_checkpoints_dir, step, optimizer, scheduler) Log.i(f'Model has been saved to {path_to_checkpoint}') if should_stop: break Log.i('Done')
args.model_def, image_size=config.GLOBAL.IMAGE_SIZE[0]) if torch.cuda.is_available(): model = model.cuda() # If specified we start from checkpoint # if config.TRAIN.PRETRAINED_WEIGHTS is not None and config.TRAIN.PRETRAINED_WEIGHTS != '': # if config.TRAIN.PRETRAINED_WEIGHTS.endswith(".pth"): # backbone.load_state_dict(torch.load(config.TRAIN.PRETRAINED_WEIGHTS)) # else: # backbone.load_darknet_weights(config.TRAIN.PRETRAINED_WEIGHTS) # Get dataloader dataset = DatasetBase.from_name('tiny-person')( config.TRAIN.PATH_TO_IMAGES_DIR, config.TRAIN.PATH_TO_ANNOTATIONS, DatasetBase.Mode.TRAIN) dataloader = DataLoader(dataset, batch_size=config.TRAIN.BATCH_SIZE, sampler=DatasetBase.NearestRatioRandomSampler( dataset.image_ratios, num_neighbors=config.TRAIN.BATCH_SIZE), num_workers=config.TRAIN.NUM_WORKERS, collate_fn=dataset.collate_fn, pin_memory=True) optimizer = torch.optim.Adam(model.parameters()) metrics = [ "grid_size", "loss",
def main(): parser = argparse.ArgumentParser() parser.add_argument('--image_min_side', type=float, help='default: {:g}'.format(Config.IMAGE_MIN_SIDE)) parser.add_argument('--image_max_side', type=float, help='default: {:g}'.format(Config.IMAGE_MAX_SIDE)) parser.add_argument('--anchor_ratios', type=str, help='default: "{!s}"'.format( Config.ANCHOR_RATIOS)) parser.add_argument('--anchor_sizes', type=str, help='default: "{!s}"'.format(Config.ANCHOR_SIZES)) parser.add_argument('--pooler_mode', type=str, choices=Pooler.OPTIONS, help='default: {.value:s}'.format( Config.POOLER_MODE)) parser.add_argument('--rpn_pre_nms_top_n', type=int, help='default: {:d}'.format( Config.RPN_PRE_NMS_TOP_N)) parser.add_argument('--rpn_post_nms_top_n', type=int, help='default: {:d}'.format( Config.RPN_POST_NMS_TOP_N)) args = parser.parse_args() input_root = '/home/mmlab/CCTV_Server/models/detectors/FasterRCNN/frames' output_root = input_root + '_output' path_to_checkpoint = '/home/mmlab/CCTV_Server/models/detectors/FasterRCNN/checkpoints/obstacle/model-90000.pth' dataset_name = 'obstacle' backbone_name = 'resnet101' prob_thresh = 0.6 Config.setup(image_min_side=args.image_min_side, image_max_side=args.image_max_side, anchor_ratios=args.anchor_ratios, anchor_sizes=args.anchor_sizes, pooler_mode=args.pooler_mode, rpn_pre_nms_top_n=args.rpn_pre_nms_top_n, rpn_post_nms_top_n=args.rpn_post_nms_top_n) print('Arguments:') for k, v in vars(args).items(): print(f'\t{k} = {v}') print(Config.describe()) os.makedirs(output_root, exist_ok=True) input_sub_dirnames = [ directory for directory in os.listdir(input_root) if os.path.isdir(os.path.join(input_root, directory)) ] dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) for sub_dir in input_sub_dirnames: input_sub_dirpath = os.path.join(input_root, sub_dir) output_sub_dirpath = os.path.join(output_root, sub_dir) filenames = [ image_basename(f) for f in os.listdir(input_sub_dirpath) if is_image(f) ] for filename in filenames: path_to_input_image = image_path(input_sub_dirpath, filename, '.jpg') # path_to_input_image = '/faster-RCNN/frames/1_360p/1_360p_0001.jpg' path_to_output_image = image_path(output_sub_dirpath, filename, '.jpg') # path_to_output_image = '/faster-RCNN/frames_output/1_360p/1_360p_0001.jpg' os.makedirs(os.path.join( os.path.curdir, os.path.dirname(path_to_output_image)), exist_ok=True) with torch.no_grad(): image = transforms.Image.open(path_to_input_image) image_tensor, scale = dataset_class.preprocess( image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_classes, detection_probs, _, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] draw = ImageDraw.Draw(image) for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = random.choice([ 'red', 'green', 'blue', 'yellow', 'purple', 'white' ]) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] draw.rectangle( ((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) image.save(path_to_output_image) print(f'Output image is saved to {path_to_output_image}')
def _train(dataset_name: str, backbone_name: str, path_to_data_dir: str, path_to_checkpoints_dir: str, path_to_resuming_checkpoint: Optional[str]): dataset = DatasetBase.from_name(dataset_name)(path_to_data_dir, DatasetBase.Mode.TRAIN, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) Log.i('Found {:d} samples'.format(len(dataset))) backbone = BackboneBase.from_name(backbone_name)(pretrained=True) model = Model(backbone, dataset.num_classes(), pooling_mode=Config.POOLING_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_scales=Config.ANCHOR_SCALES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() optimizer = optim.SGD(model.parameters(), lr=Config.LEARNING_RATE, momentum=Config.MOMENTUM, weight_decay=Config.WEIGHT_DECAY) scheduler = MultiStepLR(optimizer, milestones=Config.STEP_LR_SIZES, gamma=Config.STEP_LR_GAMMA) step = 0 time_checkpoint = time.time() losses = deque(maxlen=100) summary_writer = SummaryWriter(os.path.join(path_to_checkpoints_dir, 'summaries')) should_stop = False num_steps_to_display = Config.NUM_STEPS_TO_DISPLAY num_steps_to_snapshot = Config.NUM_STEPS_TO_SNAPSHOT num_steps_to_finish = Config.NUM_STEPS_TO_FINISH if path_to_resuming_checkpoint is not None: step = model.load(path_to_resuming_checkpoint, optimizer, scheduler) Log.i(f'Model has been restored from file: {path_to_resuming_checkpoint}') Log.i('Start training') while not should_stop: for batch_index, (_, image_batch, _, bboxes_batch, labels_batch) in enumerate(dataloader): assert image_batch.shape[0] == 1, 'only batch size of 1 is supported' image = image_batch[0].cuda() bboxes = bboxes_batch[0].cuda() labels = labels_batch[0].cuda() forward_input = Model.ForwardInput.Train(image, gt_classes=labels, gt_bboxes=bboxes) forward_output: Model.ForwardOutput.Train = model.train().forward(forward_input) anchor_objectness_loss, anchor_transformer_loss, proposal_class_loss, proposal_transformer_loss = forward_output loss = anchor_objectness_loss + anchor_transformer_loss + proposal_class_loss + proposal_transformer_loss optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() losses.append(loss.item()) summary_writer.add_scalar('train/anchor_objectness_loss', anchor_objectness_loss.item(), step) summary_writer.add_scalar('train/anchor_transformer_loss', anchor_transformer_loss.item(), step) summary_writer.add_scalar('train/proposal_class_loss', proposal_class_loss.item(), step) summary_writer.add_scalar('train/proposal_transformer_loss', proposal_transformer_loss.item(), step) summary_writer.add_scalar('train/loss', loss.item(), step) step += 1 if step == num_steps_to_finish: should_stop = True if step % num_steps_to_display == 0: elapsed_time = time.time() - time_checkpoint time_checkpoint = time.time() steps_per_sec = num_steps_to_display / elapsed_time samples_per_sec = dataloader.batch_size * steps_per_sec eta = (num_steps_to_finish - step) / steps_per_sec / 3600 avg_loss = sum(losses) / len(losses) lr = scheduler.get_lr()[0] Log.i(f'[Step {step}] Avg. Loss = {avg_loss:.6f}, Learning Rate = {lr:.6f} ({samples_per_sec:.2f} samples/sec; ETA {eta:.1f} hrs)') if step % num_steps_to_snapshot == 0 or should_stop: path_to_checkpoint = model.save(path_to_checkpoints_dir, step, optimizer, scheduler) Log.i(f'Model has been saved to {path_to_checkpoint}') if should_stop: break Log.i('Done')
def _infer(path_to_input_image: str, path_to_output_image: str, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).to(device) model.load(path_to_checkpoint) if os.path.isfile(path_to_input_image): files = [path_to_input_image] else: files = os.listdir(path_to_input_image) print('Running inference on folder:', path_to_input_image) with torch.no_grad(): for file in tqdm(files): image = transforms.Image.open( os.path.join(path_to_input_image, file)) image_tensor, scale = dataset_class.preprocess( image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_probs, detection_vertices, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).to(device)) detection_bboxes /= scale detection_vertices /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_probs = detection_probs[kept_indices] detection_vertices = detection_vertices[kept_indices] draw = ImageDraw.Draw(image) for bbox, prob, vert in zip(detection_bboxes.tolist(), detection_probs.tolist(), detection_vertices.tolist()): color = random.choice( ['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = "cuboid" draw.rectangle( ((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) quads = [] quads.append( ((int(vert[0][0]), int(vert[1][0])), (int(vert[0][1]), int(vert[1][1])), (int(vert[0][3]), int(vert[1][3])), (int(vert[0][2]), int(vert[1][2])), (int(vert[0][0]), int(vert[1][0])))) quads.append( ((int(vert[0][0]), int(vert[1][0])), (int(vert[0][4]), int(vert[1][4])), (int(vert[0][5]), int(vert[1][5])), (int(vert[0][1]), int(vert[1][1])), (int(vert[0][0]), int(vert[1][0])))) quads.append( ((int(vert[0][0]), int(vert[1][0])), (int(vert[0][4]), int(vert[1][4])), (int(vert[0][6]), int(vert[1][6])), (int(vert[0][2]), int(vert[1][2])), (int(vert[0][0]), int(vert[1][0])))) quads.append( ((int(vert[0][1]), int(vert[1][1])), (int(vert[0][5]), int(vert[1][5])), (int(vert[0][7]), int(vert[1][7])), (int(vert[0][3]), int(vert[1][3])), (int(vert[0][1]), int(vert[1][1])))) quads.append( ((int(vert[0][4]), int(vert[1][4])), (int(vert[0][5]), int(vert[1][5])), (int(vert[0][7]), int(vert[1][7])), (int(vert[0][6]), int(vert[1][6])), (int(vert[0][4]), int(vert[1][4])))) quads.append( ((int(vert[0][2]), int(vert[1][2])), (int(vert[0][3]), int(vert[1][3])), (int(vert[0][7]), int(vert[1][7])), (int(vert[0][6]), int(vert[1][6])), (int(vert[0][2]), int(vert[1][2])))) for quad in quads: draw.line(quad, fill=color) output_path = os.path.join(path_to_output_image, file) image.save(output_path) if detection_probs.size()[0] > 0: max_index = torch.argmax(detection_probs) detection_vertices = detection_vertices[max_index] detection_vertices = detection_vertices.cpu().numpy() with open(os.path.join(path_to_output_image, file + '.npy'), 'wb') as f: np.save(f, detection_vertices)
def _train(dataset_name: str, backbone_name: str, path_to_data_dir: str, path_to_checkpoints_dir: str, path_to_resuming_checkpoint: Optional[str]): dataset = DatasetBase.from_name(dataset_name)(path_to_data_dir, DatasetBase.Mode.TRAIN, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) dataloader = DataLoader(dataset, batch_size=Config.BATCH_SIZE, sampler=DatasetBase.NearestRatioRandomSampler( dataset.image_ratios, num_neighbors=Config.BATCH_SIZE), num_workers=0, collate_fn=DatasetBase.padding_collate_fn, pin_memory=True) #为便于调试,num_works置为0 Log.i('Found {:d} samples'.format(len(dataset))) backbone = BackboneBase.from_name(backbone_name)(pretrained=True) model = nn.DataParallel( Model(backbone, dataset.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N, anchor_smooth_l1_loss_beta=Config.ANCHOR_SMOOTH_L1_LOSS_BETA, proposal_smooth_l1_loss_beta=Config.PROPOSAL_SMOOTH_L1_LOSS_BETA ).cuda()) # 便于调试 # model = Model( # backbone, dataset.num_classes(), pooler_mode=Config.POOLER_MODE, # anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, # rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N, # anchor_smooth_l1_loss_beta=Config.ANCHOR_SMOOTH_L1_LOSS_BETA, proposal_smooth_l1_loss_beta=Config.PROPOSAL_SMOOTH_L1_LOSS_BETA # ).cuda() ''' 训练用参数: IMAGE_MIN_SIDE: float = 600.0 IMAGE_MAX_SIDE: float = 1000.0 ANCHOR_RATIOS: List[Tuple[int, int]] = [(1, 2), (1, 1), (2, 1)] ANCHOR_SIZES: List[int] = [128, 256, 512] POOLER_MODE: Pooler.Mode = Pooler.Mode.ALIGN RPN_PRE_NMS_TOP_N: int = 12000 RPN_POST_NMS_TOP_N: int = 2000 ANCHOR_SMOOTH_L1_LOSS_BETA: float = 1.0 PROPOSAL_SMOOTH_L1_LOSS_BETA: float = 1.0 BATCH_SIZE: int = 1 LEARNING_RATE: float = 0.001 MOMENTUM: float = 0.9 WEIGHT_DECAY: float = 0.0005 STEP_LR_SIZES: List[int] = [50000, 70000] STEP_LR_GAMMA: float = 0.1 WARM_UP_FACTOR: float = 0.3333 WARM_UP_NUM_ITERS: int = 500 NUM_STEPS_TO_DISPLAY: int = 20 NUM_STEPS_TO_SNAPSHOT: int = 10000 NUM_STEPS_TO_FINISH: int = 90000 ''' #动量的意义: #1.降低病态条件数带来的振荡 #2.减少随机梯度带来的方差(权值的衰减也有这个用处) #优化算法的两种衰减: #1. 权值的衰减:表现为在总的损失函数后面再加上权值的L2范数 #2.学习率的衰减:表现为通过学习率调节器以不同策略随着学习步增加,对学习率进行衰减调节 #optimizer = optim.Adam(model.parameters()) optimizer = optim.SGD(model.parameters(), lr=Config.LEARNING_RATE, momentum=Config.MOMENTUM, weight_decay=Config.WEIGHT_DECAY) scheduler = WarmUpMultiStepLR(optimizer, milestones=Config.STEP_LR_SIZES, gamma=Config.STEP_LR_GAMMA, factor=Config.WARM_UP_FACTOR, num_iters=Config.WARM_UP_NUM_ITERS) step = 0 time_checkpoint = time.time() losses = deque(maxlen=100) summary_writer = SummaryWriter( os.path.join(path_to_checkpoints_dir, 'summaries')) should_stop = False num_steps_to_display = Config.NUM_STEPS_TO_DISPLAY num_steps_to_snapshot = Config.NUM_STEPS_TO_SNAPSHOT num_steps_to_finish = Config.NUM_STEPS_TO_FINISH if path_to_resuming_checkpoint is not None: step = model.module.load(path_to_resuming_checkpoint, optimizer, scheduler) Log.i( f'Model has been restored from file: {path_to_resuming_checkpoint}' ) device_count = torch.cuda.device_count() #BATCH_SIZE默认是1 assert Config.BATCH_SIZE % device_count == 0, 'The batch size is not divisible by the device count' Log.i('Start training with {:d} GPUs ({:d} batches per GPU)'.format( torch.cuda.device_count(), Config.BATCH_SIZE // torch.cuda.device_count())) while not should_stop: for _, (_, image_batch, _, bboxes_batch, labels_batch) in enumerate(dataloader): #训练使用的数据集采用voc2007 batch_size = image_batch.shape[0] #(1,) image_batch = image_batch.cuda() #(1,3,h,w) bboxes_batch = bboxes_batch.cuda() #(1,gt_n,4) labels_batch = labels_batch.cuda() #(1,gt_n) anchor_objectness_losses, anchor_transformer_losses, proposal_class_losses, proposal_transformer_losses = \ model.train().forward(image_batch, bboxes_batch, labels_batch) #rpn的损失 anchor_objectness_loss = anchor_objectness_losses.mean() anchor_transformer_loss = anchor_transformer_losses.mean() #detection的损失 proposal_class_loss = proposal_class_losses.mean() proposal_transformer_loss = proposal_transformer_losses.mean() loss = anchor_objectness_loss + anchor_transformer_loss + proposal_class_loss + proposal_transformer_loss optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() losses.append(loss.item()) summary_writer.add_scalar('train/anchor_objectness_loss', anchor_objectness_loss.item(), step) summary_writer.add_scalar('train/anchor_transformer_loss', anchor_transformer_loss.item(), step) summary_writer.add_scalar('train/proposal_class_loss', proposal_class_loss.item(), step) summary_writer.add_scalar('train/proposal_transformer_loss', proposal_transformer_loss.item(), step) summary_writer.add_scalar('train/loss', loss.item(), step) step += 1 if step == num_steps_to_finish: should_stop = True if step % num_steps_to_display == 0: elapsed_time = time.time() - time_checkpoint time_checkpoint = time.time() steps_per_sec = num_steps_to_display / elapsed_time samples_per_sec = batch_size * steps_per_sec eta = (num_steps_to_finish - step) / steps_per_sec / 3600 avg_loss = sum(losses) / len(losses) lr = scheduler.get_lr()[0] #lr = optimizer.param_groups[0]['lr'] Log.i( f'[Step {step}] Avg. Loss = {avg_loss:.6f}, Learning Rate = {lr} ({samples_per_sec:.2f} samples/sec; ETA {eta:.1f} hrs)' ) #test if step == 10: path_to_checkpoint = model.module.save(path_to_checkpoints_dir, step, optimizer, scheduler) #path_to_checkpoint = model.module.save(path_to_checkpoints_dir, step, optimizer) Log.i(f'Model has been saved to {path_to_checkpoint}') if step % num_steps_to_snapshot == 0 or should_stop: path_to_checkpoint = model.module.save(path_to_checkpoints_dir, step, optimizer, scheduler) #path_to_checkpoint = model.module.save(path_to_checkpoints_dir, step, optimizer) Log.i(f'Model has been saved to {path_to_checkpoint}') if should_stop: break Log.i('Done')
def _infer(path_to_input_image: str, path_to_output_image: str, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) ''' 默认选项: pooler_mode=Config.POOLER_MODE= Pooler.Mode.ALIGN anchor_ratios=Config.ANCHOR_RATIOS= [(1, 2), (1, 1), (2, 1)] anchor_sizes=对于infer,这里默认增加了一个64,因此最后就是[64,128, 256, 512] 用于Eval的RPN_NMS: RPN_PRE_NMS_TOP_N: int = 6000 RPN_POST_NMS_TOP_N: int = 300 ''' with torch.no_grad(): #预处理,使得输入图像至少一边满足min_side或max_side #yolo需要固定图像尺寸,这里并不需要. image = transforms.Image.open(path_to_input_image) image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) #先增加一个批的维度,再以eval模式下执行forward. #(gd_n,4) (gd_n,) (gd_n,) detection_bboxes, detection_classes, detection_probs, _ = model.eval( ).forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale #原图像是经过乘scale的,因此这里对于detection_box要除scale。 kept_indices = detection_probs > prob_thresh #0.6 detection_bboxes = detection_bboxes[kept_indices] #(gd_thresh_n,4) detection_classes = detection_classes[kept_indices] #(gd_thresh_n,) detection_probs = detection_probs[kept_indices] #(gd_thresh_n,) draw = ImageDraw.Draw(image) for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = random.choice( ['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) image.save(path_to_output_image) print(f'Output image is saved to {path_to_output_image}')
def val(model, dataset_name, path_to_data_dir, device): dataset = DatasetBase.from_name(dataset_name)(path_to_data_dir, DatasetBase.Mode.EVAL, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) evaluator = Evaluator(dataset, path_to_data_dir) mean_ap, detail = evaluator.evaluate_pck(model.module, device) print('VALIDATION', detail, mean_ap)