def fasterrcnn_resnet50_fpn( pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs ): if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone("resnet50", pretrained_backbone) anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = CachelessAnchorGenerator(anchor_sizes, aspect_ratios) model = FasterRCNN( backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, **kwargs ) # min_size = 300 # max_size = 400 # anchor_sizes = ((12,), (24,), (48,), (96,), (192,)) # aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) # rpn_anchor_generator = CachelessAnchorGenerator( # anchor_sizes, aspect_ratios # ) # model = FasterRCNN(backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, min_size=min_size, max_size=max_size, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls["fasterrcnn_resnet50_fpn_coco"], progress=progress ) model.load_state_dict(state_dict) return model
def custom_fasterrcnn_resnet_fpn(backbone, pretrained=True, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=3, **kwargs): backbone_name = backbone['name'] backbone_params_config = backbone['params'] assert 0 <= trainable_backbone_layers <= 5 # dont freeze any layers if pretrained model or backbone is not used if not (pretrained or pretrained_backbone): backbone_params_config['trainable_backbone_layers'] = 5 if pretrained: # no need to download the backbone if pretrained is set backbone_params_config['pretrained'] = False backbone_model = custom_resnet_fpn_backbone(backbone_name, backbone_params_config) num_feature_maps = len(backbone_model.body.return_layers) box_roi_pool = None if num_feature_maps == 4 \ else MultiScaleRoIAlign(featmap_names=[str(i) for i in range(num_feature_maps)], output_size=7, sampling_ratio=2) model = FasterRCNN(backbone_model, num_classes, box_roi_pool=box_roi_pool, **kwargs) if pretrained and backbone_name.endswith('resnet50'): state_dict = load_state_dict_from_url( fasterrcnn_model_urls['fasterrcnn_resnet50_fpn_coco'], progress=progress) model.load_state_dict(state_dict, strict=False) return model
def custom_fasterrcnn_resnet_fpn(backbone, pretrained=True, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=3, **kwargs): backbone_name = backbone['name'] backbone_params_config = backbone['params'] assert 0 <= trainable_backbone_layers <= 5 # dont freeze any layers if pretrained model or backbone is not used if not (pretrained or pretrained_backbone): backbone_params_config['trainable_backbone_layers'] = 5 if pretrained: # no need to download the backbone if pretrained is set backbone_params_config['pretrained'] = False backbone_model = custom_resnet_fpn_backbone(backbone_name, backbone_params_config) model = FasterRCNN(backbone_model, num_classes, **kwargs) if pretrained and backbone_name.endswith('resnet50'): state_dict = load_state_dict_from_url( fasterrcnn_model_urls['fasterrcnn_resnet50_fpn_coco'], progress=progress) model.load_state_dict(state_dict, strict=False) return model
def fasterrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=3, model_dir=None, **kwargs): """ Constructs a Faster R-CNN model with a ResNet-50-FPN backbone. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses for both the RPN and the R-CNN. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size. Arguments: pretrained (bool): If True, returns a model pre-trained on COCO train2017 progress (bool): If True, displays a progress bar of the download to stderr pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet num_classes (int): number of output classes of the model (including the background) trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. """ assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0 # dont freeze any layers if pretrained model or backbone is not used if not (pretrained or pretrained_backbone): trainable_backbone_layers = 5 if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet50', pretrained_backbone) model = FasterRCNN(backbone, num_classes, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls['fasterrcnn_resnet50_fpn_coco'], progress=progress, model_dir=model_dir) model.load_state_dict(state_dict) return model
class PyTorchModel: #create model by loading it in from Google Drive path def __init__(self, f): trainable_backbone_layers = 5 pretrained = True backbone = resnet_fpn_backbone( 'resnet50', True, trainable_layers=trainable_backbone_layers) self.model = FasterRCNN(backbone, num_classes=10, max_size=3840, min_size=2160, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=2000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=2000, box_detections_per_img=100, rpn_nms_thresh=0.01, box_nms_thresh=0.01) #num_classes = 10 #self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) #in_features = self.model.roi_heads.box_predictor.cls_score.in_features #self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') self.model.to(device) if (isinstance(f, str)): #local file print("Loading model from local file at {}".format(f)) self.model.load_state_dict(torch.load(f, map_location=device)) elif (isinstance(f, io.BytesIO)): #stream print("Loading model from stream") pass def predict(self, image) -> List[Label]: frame = torchvision.transforms.ToTensor()(image) frame = frame[None, :, :] self.model.eval() prediction = self.model(frame) print(prediction) boxes = prediction[0]["boxes"] labels = prediction[0]["labels"] scores = prediction[0]["scores"] ret = list() for i in range(0, len(boxes)): score: float = float(scores[i].item()) xmin: int = int(boxes[i][0].item()) ymin: int = int(boxes[i][1].item()) xmax: int = int(boxes[i][2].item()) ymax: int = int(boxes[i][3].item()) group: str = classes[str(labels[i].item())]["category"] color: str = classes[str(labels[i].item())]["color"] ret.append(Label(i, group, xmin, xmax, ymin, ymax, color, score)) return ret
def fasterrcnn_resnet101_fpn(pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, model_dir=None, **kwargs): """ Constructs a Faster R-CNN model with a ResNet-101-FPN backbone. Note that it is NOT an official model. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses for both the RPN and the R-CNN. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size. Arguments: pretrained (bool): If True, returns a model pre-trained on COCO train2017 progress (bool): If True, displays a progress bar of the download to stderr """ if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet101', pretrained_backbone) model = FasterRCNN(backbone, num_classes, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls['fasterrcnn_resnet101_fpn_coco'], progress=progress, model_dir=model_dir) model.load_state_dict(state_dict['model']) return model
def fasterrcnn_resnet101_fpn(pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=3, **kwargs): assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0 # dont freeze any layers if pretrained model or backbone is not used if not (pretrained or pretrained_backbone): trainable_backbone_layers = 5 if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet101', pretrained_backbone, trainable_layers=trainable_backbone_layers) model = FasterRCNN(backbone, num_classes, **kwargs) if pretrained: model.load_state_dict('resnet101_7a82fa4a.pth') return model
def _init_faster_rcnn(backbone='ResNet', num_classes=91, **kwargs): global MODEL_NAME device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") res50 = models.resnet.__dict__['resnet50']( pretrained=False, norm_layer=misc_nn_ops.FrozenBatchNorm2d) res50.load_state_dict( torch.load(os.path.join(PRETRAINED_DEEP_MODEL_DIR, 'resnet50-19c8e357.pth'), map_location=device)) backbone = _resnet_fpn_backbone(res50) model = FasterRCNN(backbone, num_classes, **kwargs) model.load_state_dict( torch.load(os.path.join(PRETRAINED_DEEP_MODEL_DIR, MODEL_NAME['Faster_RCNN']), map_location=device)) # model.load_state_dict(torch.load(os.path.join( # PRETRAINED_DEEP_MODEL_DIR, MODEL_NAME['Faster_RCNN']), map_location=device)) return model
def fasterrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs): """ Constructs a Faster R-CNN model with a ResNet-50-FPN backbone. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses for both the RPN and the R-CNN. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size. Example:: >>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) >>> # For training >>> images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4) >>> labels = torch.randint(1, 91, (4, 11)) >>> images = list(image for image in images) >>> targets = [] >>> for i in range(len(images)): >>> d = {} >>> d['boxes'] = boxes[i] >>> d['labels'] = labels[i] >>> targets.append(d) >>> output = model(images, targets) >>> # For inference >>> model.eval() >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> predictions = model(x) >>> >>> # optionally, if you want to export the model to ONNX: >>> torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11) Arguments: pretrained (bool): If True, returns a model pre-trained on COCO train2017 progress (bool): If True, displays a progress bar of the download to stderr """ if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet50', pretrained_backbone) model = FasterRCNN(backbone, num_classes, **kwargs) if pretrained: state_dict = torch.load( 'D:/Models/torchpretrainedmodels/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth' ) model.load_state_dict(state_dict) return model
import time import torch import torchvision from class_labels import GROCERY_LIST_V0 from PIL import Image from torch.autograd import Variable from torchvision import transforms from torchvision.models.detection.faster_rcnn import FasterRCNN from torchvision.models.detection.backbone_utils import resnet_fpn_backbone model = FasterRCNN(resnet_fpn_backbone("resnet50", False), num_classes=64) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model_path = "synthdet_faster_rcnn.pth" model_save = torch.load(model_path, map_location=device) # no CUDA on macOS model.load_state_dict(model_save["model"]) model.to(device) model.eval() # preprocess on test image image_path = sys.argv[1] image = Image.open(image_path) image_to_tensor = transforms.Compose([ transforms.ToTensor() ]) tensor = image_to_tensor(image) # inference threshold = 0.5 # Start timing