def start_evaluation(test_data_loader: DataLoader, model: FasterRCNN, device: str, epoch: int, logger: Logger, args: argparse.Namespace) -> Tensor: """ Evaluate the model with the test set :param test_data_loader: Data loader for test data: :param model: Model that is being tested :param device: Device for the computation :param epoch: Current epoch :param logger: Logger for logging handling :param args: Arguments :return: """ logger.info(f'Start evaluation after {epoch} epochs') model.eval() scores = [] for idx, result in enumerate(test_data_loader): images = list(image.to(device) for image in result[0]) targets = result[1] with torch.set_grad_enabled(False): outputs = model(images) for output_idx, element in enumerate(outputs): predicted_labels = element['labels'] true_labels = targets[output_idx]['labels'] if len(element['scores']) != 0: scores.append(torch.mean(element['scores'])) if idx % args.print_status: logger.info( f'Scores {element["scores"]} \n' f'Labels predicted: {predicted_labels} Groundtruth labels: {true_labels}' ) avg_score = torch.mean(torch.Tensor(scores)) return avg_score
def demo(): # load a pre-trained model for classification and return # only the features backbone = torchvision.models.mobilenet_v2(pretrained=True).features # FasterRCNN needs to know the number of # output channels in a backbone. For mobilenet_v2, it's 1280 # so we need to add it here backbone.out_channels = 1280 # let's make the RPN generate 5 x 3 anchors per spatial # location, with 5 different sizes and 3 different aspect # ratios. We have a Tuple[Tuple[int]] because each feature # map could potentially have different sizes and # aspect ratios anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) # let's define what are the feature maps that we will # use to perform the region of interest cropping, as well as # the size of the crop after rescaling. # if your backbone returns a Tensor, featmap_names is expected to # be [0]. More generally, the backbone should return an # OrderedDict[Tensor], and in featmap_names you can choose which # feature maps to use. roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2) # put the pieces together inside a FasterRCNN model model = FasterRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) model.eval() x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] predictions = model(x) print(predictions)
class SegmentationNetwork(nn.Module): def __init__(self, backbone=None, output_channels=2, backbone_output_channels=512): super().__init__() if not backbone: b, _ = remove_backbone_head(resnet18(pretrained=False)) backbone = b # ResNet produces 512-length outputs backbone.out_channels = backbone_output_channels self.segmentation_network = FasterRCNN(backbone, num_classes=output_channels) def forward(self, x, boxes=None): x = self.segmentation_network(x, boxes) return x def infer(self, x): self.eval() self.segmentation_network.eval() x = self.segmentation_network(x) return convert_bounding_box_inference(x)
class ResNet50_FasterRCNN: def __init__(self, pretrained=False): # Building our FasterRCNN model for objects detection backbone = resnet_fpn_backbone('resnet50', pretrained=pretrained) num_classes = 4 + 1 anchor_generator = AnchorGenerator(sizes=(40, 60, 150, 200, 250), aspect_ratios=(0.7, 1.0, 1.3)) self.model = FRCNN(backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator) def train(self): self.model.train() def to(self, device): self.model.to(device) def eval(self): self.model.eval() def parameters(self): return self.model.parameters() def get_state_dict(self): return self.model.state_dict() def set_state_dict(self, state_dict): self.model.load_state_dict(state_dict) def fit_batch(self, images, target): return self.model(images, target) def predict_batch(self, images): return self.model(images)
class TorchDetector: """ Torch object detector """ def __init__(self, config, logger): self._logger = logger self._threshold = config['threshold'] modelfile = config['model'] self._device = config['device'] # cpu, cuda, cuda:0 backbone = resnet_fpn_backbone('resnet50', False) self._model = FasterRCNN(backbone, 8) # 8 classes checkpoint = torch.load(modelfile, map_location=self._device) self._model.load_state_dict(checkpoint['model_state_dict']) device = torch.device(self._device) self._model.to(device) self._model.eval() def stop(self): """ Destruction """ def detectObjects(self, img) -> List[e.DetectedObject]: """ Implementation of detector interface """ wsize = 1600 hsize = 800 _pretransform = A.Compose([ A.Resize(hsize, wsize), A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensorV2(), ]) image_tensor = _pretransform(image=img)['image'] tstart = time.time() outputs = self._model.forward( image_tensor.unsqueeze(0).float().to(device=self._device)) classes = outputs[0]['labels'].detach().cpu().numpy() scores = outputs[0]['scores'].detach().cpu().numpy() boxes = outputs[0]['boxes'].detach().cpu().numpy() self._logger.debug( f'Torch model inferring time: {time.time() - tstart}') result = zip(classes, scores, boxes) h, w, _ = img.shape wscale = w / wsize hscale = h / hsize #print(f'h,w:{h},{w}; wsc,hsc:{wscale},{hscale}') #print(list(result)) return ObjectDetector.getDetectedObjectsCollection( result, hscale, wscale, self._threshold, False)
class Detect: def __init__(self): super().__init__() backbone = torchvision.models.vgg16(pretrained=False).features backbone.out_channels = 512 anchor_sizes = ((8, 16, 32, 64, 128, 256, 512), ) aspect_ratios = ((1 / 2, 1 / 3, 1 / 4, 1 / 5, 1 / 6, 1 / math.sqrt(2), 1, 2, math.sqrt(2), 3, 4, 5, 6, 7, 8), ) anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios) roi_pooler = torchvision.ops.MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3', '4'], output_size=7, sampling_ratio=2) self.model = FasterRCNN(backbone, num_classes=7, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) self.device = torch.device('cpu') self.model.load_state_dict(torch.load('2.pth')) self.model.to(self.device) self.model.eval() def forward(self, img): img = torch.tensor(img, dtype=torch.float32) / 255 img = img.permute((2, 0, 1)) output = model([img.to(self.device)]) boxes = output[0]['boxes'] labels = output[0]['labels'] scores = output[0]['scores'] last = {} result = {} for i, v in enumerate(labels): if v == 1 and scores[i] > last['send']: last['send'] = scores[i] result['send'] = boxes[i] elif v == 2 and scores[i] > last['number']: last['number'] = scores[i] result['number'] = boxes[i] elif v == 3 and scores[i] > last['date']: last['date'] = scores[i] result['date'] = boxes[i] elif v == 4 and scores[i] > last['quote']: last['quote'] = scores[i] elif v == 5 and scores[i] > last['header']: last['header'] = scores[i] result['header'] = boxes[i] elif v == 6 and scores[i] > last['motto']: last['motto'] = scores[i] result['motto'] = boxes[i] # elif v == 7 and scores[i] > last['secrete']: # last['secrete'] = scores[i] # result['secrete'] = boxes[i] # elif v == 8 and scores[i] > last['sign']: # last['sign'] = scores[i] # result['sign'] = boxes[i] return result
EPOCH = 250 CLASSES = 3 DEVICE = torch.device("cuda") BATCH_SIZE = 10 anchor_generator = AnchorGenerator(sizes=((32, 64), ), aspect_ratios=((0.6, 1.0, 1.6), )) backbone = torchvision.models.vgg19(pretrained=False).features backbone.out_channels = 512 model = FasterRCNN(backbone, num_classes=CLASSES, rpn_anchor_generator=anchor_generator) model.load_state_dict( torch.load('models_new/' + 'model_' + str(EPOCH) + '.pth')) model.to(DEVICE) model.eval() start_time = time.time() ear_count = 0 for T in types: for E in ears: CTs = os.listdir(data_path + dataset_name + T + E) for CT in CTs: print('current path:{}'.format(data_path + dataset_name + T + E + CT)) ear_count += 1 img_names = glob.glob(data_path + dataset_name + T + E + CT + '/*.jpg') sorted(img_names, key=lambda x: x.split('\\')[-1]) with torch.no_grad(): start, end = 0, BATCH_SIZE path = data_path + result_name + T + E + CT
class FasterRCNNFood: def __init__(self, backbone_name: str, pretrained: bool = True, finetune: bool = True, num_classes: int = 2): self.__pretrained = pretrained self.__num_classes = num_classes self.__model_name = backbone_name backbone = build_backbone(backbone_name, pretrained, finetune) anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) self.model = FasterRCNN(backbone=backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) self.params = [p for p in self.model.parameters() if p.requires_grad] self.optimizer = torch.optim.Adam(params=self.params, lr=0.005, weight_decay=0.0005) self.lr_scheduler = torch.optim.lr_scheduler.StepLR( optimizer=self.optimizer, step_size=3, gamma=0.1) def train(self, data_loader: DataLoader, data_loader_test: DataLoader, num_epochs: int = 10, use_cuda: bool = True, epoch_save_ckpt: Union[int, list] = None, dir: str = None): """ Method to train FasterRCNNFood model. Args: data_loader (torch.utils.data.DataLoader): data loader to train model on data_loader_test (torch.utils.data.DataLoader): data loader to evaluate model on num_epochs (int = 10): number of epoch to train model use_cuda (bool = True): use cuda or not epoch_save_ckpt (list or int): Epoch at which you want to save the model. If -1 save only last epoch. dir (str = "models/): Directory where model are saved under the name "{model_name}_{date}_ep{epoch}.pth" """ if epoch_save_ckpt == -1: epoch_save_ckpt = [num_epochs - 1] if not dir: dir = "models" dir = Path(dir) dir.mkdir(parents=True, exist_ok=True) # choose device if use_cuda and torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") # define dataset self.model.to(device) writer = SummaryWriter() for epoch in range(num_epochs): # train for one epoch, printing every 50 iterations train_one_epoch(self.model, self.optimizer, data_loader, device, epoch, print_freq=50, writer=writer) # update the learning rate self.lr_scheduler.step() # evaluate on the test dataset evaluate(self.model, data_loader_test, device=device, writer=writer, epoch=epoch) # save checkpoint if epoch in epoch_save_ckpt: self.save_checkpoint(dir.as_posix(), epoch) writer.close() print("That's it!") def save_checkpoint(self, dir: str, epoch: int): """ Save a model checkpoint at a given epoch. Args: dir: dir folder to save the .pth file epoch: epoch the model is """ state = { 'epoch': epoch + 1, 'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), 'num_classes': self.__num_classes, 'pretrained': self.__pretrained, "model_name": self.__model_name } now = datetime.now() filename = "{model_name}_{date}_ep{epoch}.pth".format( model_name=self.__model_name, date=now.strftime("%b%d_%H-%M"), epoch=epoch) torch.save(state, Path(dir) / filename) "Checkpoint saved : {}".format(Path(dir) / filename) def predict(self, dataset, idx): img, _ = dataset[idx] img.to("cpu") self.model.eval() self.model.to("cpu") pred = self.model([img]) return img, pred[0] @staticmethod def load_checkpoint(filename: str, cuda: bool = True) -> ("FasterRCNNFood", int): """ Load a model checkpoint to continue training. Args: filename (str): filename/path of the checkpoint.pth cuda (bool = True): use cuda Returns: (FasterRCNNFood) model (int) number of epoch + 1 the model was trained with """ device = torch.device("cuda") if ( cuda and torch.cuda.is_available()) else torch.device("cpu") start_epoch = 0 if Path(filename).exists(): print("=> loading checkpoint '{}'".format(filename)) checkpoint = torch.load(filename, map_location=device) # Load params pretrained = checkpoint['pretrained'] num_classes = checkpoint["num_classes"] start_epoch = checkpoint['epoch'] model_name = checkpoint['model_name'] # Build model key/architecture model = FasterRCNNFood(model_name, pretrained, num_classes) # Update model and optimizer model.model.load_state_dict(checkpoint['state_dict']) model.optimizer.load_state_dict(checkpoint['optimizer']) model.model = model.model.to(device) # now individually transfer the optimizer parts... for state in model.optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.to(device) print("=> loaded checkpoint '{}' (epoch {})".format( filename, checkpoint['epoch'])) return model, start_epoch else: print("=> no checkpoint found at '{}'".format(filename)) @staticmethod def load_for_inference(filename: str, cuda: bool = True) -> "FasterRCNNFood": """ Load a model checkpoint to make inference. Args: filename (str): filename/path of the checkpoint.pth cuda (bool = True): use cuda Returns: (FasterRCNNFood) model """ device = torch.device("cuda") if ( cuda and torch.cuda.is_available()) else torch.device("cpu") if Path(filename).exists(): print("=> loading checkpoint '{}'".format(filename)) checkpoint = torch.load(filename, map_location=device) # Load params pretrained = checkpoint['pretrained'] num_classes = checkpoint["num_classes"] model_name = checkpoint['model_name'] # Build model key/architecture model = FasterRCNNFood(model_name, pretrained, num_classes) # Update model and optimizer model.model.load_state_dict(checkpoint['state_dict']) model.model = model.model.to(device) model.model = model.model.eval() print("=> loaded checkpoint '{}'".format(filename)) return model else: print("=> no checkpoint found at '{}'".format(filename))
num_classes) return model print("注意:从https://github.com/pytorch/vision/tree/master/references/detection下载:engine.py, utils.py, transforms.py,coco_eval.py,coco_utils.py拷贝到本目录") # 写一些辅助函数来进行数据扩充/转换 def get_transform(train): transforms = [] transforms.append(T.ToTensor()) if train: transforms.append(T.RandomHorizontalFlip(0.5)) return T.Compose(transforms) # 测试forward()方法(可选) ''' model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) dataset = PennFudanDataset('PennFudanPed', get_transform(train=True)) data_loader = torch.utils.data.DataLoader( dataset, batch_size=2, shuffle=True, num_workers=4, collate_fn=utils.collate_fn) # For Training images,targets = next(iter(data_loader)) images = list(image for image in images) targets = [{k: v for k, v in t.items()} for t in targets] output = model(images,targets) # Returns losses and detections # For inference model.eval() x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] predictions = model(x) # Returns predictions '''
class FasterRCNNMODEL: #TODO: Later on enable passing params params def __init__(self, model_params=None): self.params = model_params self.model = None self.optimizer = None self.device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') def set_backbone(self, backbone): """ backbone is a string containing the backbone we want to use in the model. add more options """ if 'vgg' in backbone.lower(): "to somthing-check for options" elif 'mobilenet_v2' in backbone.lower(): self.backbone = torchvision.models.mobilenet_v2( pretrained=True).features self.backbone.out_channels = 1280 elif 'resnet50' in backbone.lower(): self.backbone = torchvision.models.resnet50( pretrained=True).features self.backbone.out_channels = 256 def set_model(self): """ Set model and determine configuration :return: None, generate self.model to be used for training and testing """ # Default values: box_score_thresh = 0.05, box_nms_thresh = 0.5 kwargs = { 'box_score_thresh': 0.3, 'box_nms_thresh': 0.3, 'box_detections_per_img': 6 } # self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, # pretrained_backbone=True, # **kwargs) self.model = FasterRCNN(self.backbone, num_classes=7, **kwargs) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') num_classes = 7 in_features = self.model.roi_heads.box_predictor.cls_score.in_features self.model.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) # Allow Multiple GPUs: # if torch.cuda.device_count() > 1: # self.model = nn.DataParallel(self.model) self.model = self.model.to(device) if self.params is None: params = [p for p in self.model.parameters() if p.requires_grad] else: # TODO: Enable user defined model params pass self.optimizer = torch.optim.SGD(params, lr=0.01) def train_model(self, train_loader, num_epochs): """ Train (only!) of the model :param train_loader: DataLoader object :param num_epochs: int. Number of epochs to train the model :return: None, """ self.model.train() # Set to training mode for epoch in range(num_epochs): for images, targets in train_loader: images = list(image.to(self.device) for image in images) targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets] # Zero Gradients self.optimizer.zero_grad() # self.model = self.model.double() # Calculate Loss loss_dict = self.model(images, targets) # what happens here? losses = sum(loss for loss in loss_dict.values()) losses.backward() # Update weights self.optimizer.step() print('Train Loss = {:.4f}'.format(losses.item())) def train_eval_model(self, train_loader, val_loader, num_epochs): """ Train model and evaluate performance after each epoch :param train_loader: DataLoader object. Training images and targets :param val_loader: DataLoader object. validation images and targets :param num_epochs: int. Number of epochs for training and validation :return: """ # For evaluation imgs_name_list = [] bbox_list = [] labels_list = [] for epoch in range(num_epochs): train_loss = 0 val_loss = 0 self.model.train() # Set to training mode with torch.set_grad_enabled(True): for images, targets in train_loader: # Pass data to GPU images = list(image.to(self.device) for image in images) targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets] # Zero Gradients self.optimizer.zero_grad() # self.model = self.model.double() # Calculate Loss loss_dict = self.model(images, targets) # what happens here? losses = sum(loss for loss in loss_dict.values()) train_loss += losses.item() * len(images) # Backward Prop & Update weights losses.backward() self.optimizer.step() print('Train Loss = {:.4f}'.format(train_loss / len(train_loader.dataset))) # TODO: Calculate Dice and IoU loss for it with torch.no_grad(): for idx, (imgs_name, images, targets) in enumerate(val_loader): self.model.train() images = list(image.to(self.device) for image in images) targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets] loss_dict = self.model(images, targets) losses = sum(loss for loss in loss_dict.values()) val_loss += losses.item() * len(images) if epoch == num_epochs - 1: self.model.eval() # Set model to evaluate performance targets = self.model(images) # Think of moving all this into gen_out_file - Looks nicer imgs_name_list.extend(imgs_name) bbox_list.extend([ target['boxes'].int().cpu().tolist() for target in targets ]) labels_list.extend([ target['labels'].int().cpu().tolist() for target in targets ]) """Optional - SEE the performance on the second last batch""" if (epoch == num_epochs - 1) and idx == (len(val_loader) - 2): self.model.eval() # Set model to evaluate performance targets = self.model(images) MiscUtils.view(images, targets, k=len(images), model_type='faster_rcnn') DataUtils.gen_out_file('output_file.txt', imgs_name_list, bbox_list, labels_list) print('Validation Loss = {:.4f}'.format( val_loss / len(val_loader.dataset)))
class glimpse_network(nn.Module): """ A network that combines the "what" and the "where" into a glimpse feature vector `g_t`. - "what": glimpse extracted from the retina. - "where": location tuple where glimpse was extracted. Concretely, feeds the output of the retina `phi` to a fc layer and the glimpse location vector `l_t_prev` to a fc layer. Finally, these outputs are fed each through a fc layer and their sum is rectified. In other words: `g_t = relu( fc( fc(l) ) + fc( fc(phi) ) )` Args ---- - h_g: hidden layer size of the fc layer for `phi`. - h_l: hidden layer size of the fc layer for `l`. - g: size of the square patches in the glimpses extracted by the retina. - k: number of patches to extract per glimpse. - s: scaling factor that controls the size of successive patches. - c: number of channels in each image. - x: a 4D Tensor of shape (B, H, W, C). The minibatch of images. - l_t_prev: a 2D tensor of shape (B, 2). Contains the glimpse coordinates [x, y] for the previous timestep `t-1`. Returns ------- - g_t: a 2D tensor of shape (B, hidden_size). The glimpse representation returned by the glimpse network for the current timestep `t`. """ def __init__(self, h_g, h_l, g, k, s, c): super(glimpse_network, self).__init__() self.retina = retina(g, k, s) self.feature_extractor = nn.Sequential( *list(resnet50(pretrained=True).children())[:-1]) #detection feature backbone = torchvision.models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) self.detection_model = FasterRCNN( backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) self.detection_model.eval() # glimpse layer D_in = k * g * g * c D_in = 2048 self.fc1 = nn.Linear(D_in, h_g) # location layer D_in = 2 self.fc2 = nn.Linear(D_in, h_l) self.fc3 = nn.Linear(h_g, h_g + h_l) self.fc4 = nn.Linear(h_l, h_g + h_l) def forward(self, x, l_t_prev, frame_index): # generate glimpse phi from image x phi = self.retina.foveate(x, l_t_prev, frame_index) # temp = self.detection_model(x[:,:,frame_index,:,:].squeeze()) # train resnet or not # phi = self.feature_extractor(phi).detach() phi = self.feature_extractor(phi) phi = phi.view(phi.size(0), -1) # flatten location vector l_t_prev = l_t_prev.view(l_t_prev.size(0), -1) # feed phi and l to respective fc layers phi_out = F.relu(self.fc1(phi)) l_out = F.relu(self.fc2(l_t_prev)) what = self.fc3(phi_out) where = self.fc4(l_out) # feed to fc layer g_t = F.relu(what + where) return g_t
def get_result_from_model(test_img, thresh): test_data = torchvision.datasets.ImageFolder('C:/Users/skyho/Desktop/test_image_folder/',loader = plt.imread,transform=transforms.ToTensor()) anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 200),), aspect_ratios=((0.5, 1.0, 2.0),)) backbone = torchvision.models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) rcnn_v1 = FasterRCNN(backbone, num_classes=32, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) try: # model_path = "bs{0}_lr{1}_epoch{2}_checkpoint_{3}".format(8,0.00005,2,15) checkpoint = torch.load('C:/Users/skyho/Desktop/final_model.pth', map_location='cpu') #, map_location='cpu' rcnn_v1.load_state_dict(checkpoint['model_state_dict']) rcnn_v1.eval() except IOError: print("Can't find saved model~") # result = [] #data = torchvision.datasets.ImageFolder(img_path, loader=plt.imread, transform=transforms.ToTensor()) result = [] with torch.no_grad(): result.append(rcnn_v1([test_data[0][0]])) # plot the boxes on the result image # print labels # save the image somewhere and return the path # cv2_im = cv2.imread(test_img) font = cv2.FONT_HERSHEY_SIMPLEX fontScale = 1 fontColor = (255,255,255) lineType = 2 cv2_im = [] #for i in range(len(test_data)): cv2_im.append(cv2.imread(test_data.imgs[0][0])) #for i in range(len(test_data)): i = 0 first_box = result[i][0]['boxes'][0].unsqueeze(0) box_id = 0 for box in result[i][0]['boxes']: if (box_id==0 or jaccard(first_box,box.unsqueeze(0)).tolist()[0][0]<0.6): if result[i][0]['scores'].tolist()[box_id]>=thresh: x1 = int(box[0]) y1 = int(box[1]) x2 = int(box[2]) y2 = int(box[3]) #check other boxes flag = True for each in range(result[i][0]['boxes'].shape[0]): if each!=box_id and result[i][0]['scores'].tolist()[each]>=thresh and jaccard(first_box,result[i][0]['boxes'][each].unsqueeze(0)).tolist()[0][0]<0.6: o_x1 = int(result[i][0]['boxes'][each][0]) o_y1 = int(result[i][0]['boxes'][each][1]) o_x2 = int(result[i][0]['boxes'][each][2]) o_y2 = int(result[i][0]['boxes'][each][3]) if x1>=o_x1-3 and y1>=o_y1-3 and x2<=o_x2+3 and y2<=o_y2+3 and result[i][0]['labels'][box_id]==result[i][0]['labels'][each]: flag = False break if flag: cv2_im[i] = cv2.rectangle(cv2_im[i],(x1,y1),(x2,y2),(0,255,0),3) cv2.putText(cv2_im[i],classes[result[i][0]['labels'][box_id]], (x1,y2), font, fontScale, fontColor, lineType) box_id += 1 detection_result = test_img[:-4] + '_result.png' #for i in range(len(test_data)): cv2.imwrite(detection_result, cv2_im[i]) return detection_result
def main(): parser = argparse.ArgumentParser( description='VISUM 2019 competition - baseline inference script', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-d', '--data_path', default='/home/master/dataset/test', metavar='', help='test data directory path') parser.add_argument('-m', '--model_path', default='./model.pth', metavar='', help='model file') parser.add_argument('-o', '--output', default='./predictions.csv', metavar='', help='output CSV file name') args = vars(parser.parse_args()) NMS_THR = 0.1 # non maximum suppresion threshold REJECT_THR_KNOWN = 0.9 # rejection threshold to classify as unknown class (naive approach!) REJECT_THR = 0.17 # rejection threshold to classify as unknown class (naive approach!) def get_transform(train): transforms = [] # converts the image, a PIL image, into a PyTorch Tensor transforms.append(T.ToTensor()) if train: # during training, randomly flip the training images # and ground-truth for data augmentation transforms.append(T.RandomHorizontalFlip(0.5)) return T.Compose(transforms) # Load datasets test_data = VisumData(args['data_path'], 'rgb', mode='test', transforms=get_transform(False)) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # initial # model = torch.load(args['model_path']) # new backbone = torchvision.models.detection.backbone_utils.resnet_fpn_backbone( 'resnet50', True) backbone.out_channels = 256 anchor_generator = AnchorGenerator(sizes=(8, 16, 32, 64, 128), aspect_ratios=(0.5, 1.0, 2.0)) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) # put the pieces together inside a FasterRCNN model model = FasterRCNN(backbone, num_classes=11, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) model.load_state_dict(args['model_path']) test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=4, collate_fn=utils.collate_fn) predictions = list() for i, (imgs, _, file_names) in enumerate(test_loader): # set the model to evaluation mode model.eval() with torch.no_grad(): prediction = model(list(img.to(device) for img in imgs)) boxes = np.array(prediction[0]['boxes'].cpu()) labels = list(prediction[0]['labels'].cpu()) scores = list(prediction[0]['scores'].cpu()) nms_boxes, nms_labels, nms_scores = nms(boxes, labels, scores, NMS_THR) for bb in range(len(nms_labels)): if nms_scores[bb] >= REJECT_THR: pred = np.concatenate( (list(file_names), list(nms_boxes[bb, :]))) # bounding box if nms_scores[bb] >= REJECT_THR_KNOWN: pred = np.concatenate( (pred, [nms_labels[bb] - 1])) # object label else: pred = np.concatenate((pred, [-1])) # Rejects to classify pred = np.concatenate( (pred, [nms_scores[bb]])) # BEST CLASS SCORE pred = list(pred) predictions.append(pred) with open(args['output'], 'w') as f: for pred in predictions: f.write("{},{},{},{},{},{},{}\n".format(pred[0], float(pred[1]), float(pred[2]), float(pred[3]), float(pred[4]), int(pred[5]), float(pred[6])))
max_size=cfg.max_size) model_ft.load_state_dict(torch.load(cfg.model_name).state_dict()) model_ft.to(device) with open(cfg.json_name, 'w', encoding='utf-8') as json_f: for file in allFileList: if os.path.isfile(cfg.test_path + file): print(file) output_dict = {} path = test_path + file img = Image.open(path).convert('RGB') img = data_transforms(img) img = img.unsqueeze(0) with torch.no_grad(): model_ft.eval() img = img.to(device) output = model_ft(img) bbox = output[0]["boxes"].cpu().numpy() label = output[0]["labels"].cpu().numpy() score = output[0]["scores"].cpu().numpy() bbox = bbox[score > score_threshold].astype('int') label = label[score > score_threshold] score = score[score > score_threshold] # remove redundant bounding box bbox, label, score = process_bbox_iou( bbox, label, score, cfg.score_threshold, cfg.IoU_threshold)