def __init__(self, hparams): super().__init__() # Гиперпараметры модели в pytorch_lightning self.hparams = hparams # Mask-RCNN self.mask_rcnn = maskrcnn_resnet50_fpn(pretrained_backbone=True, pretrained=True) # Обновляем выход для предсказания номерного знака num_classes = 2 in_features = self.mask_rcnn.roi_heads.box_predictor.cls_score.in_features self.mask_rcnn.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) self.mask_rcnn.roi_heads.mask_predictor = MaskRCNNPredictor( 256, 256, num_classes) # Разрешаем обновлять только определенные параметры for parameter in self.mask_rcnn.parameters(): parameter.requires_grad = False for parameter in self.mask_rcnn.backbone.fpn.parameters(): parameter.requires_grad = True for parameter in self.mask_rcnn.rpn.parameters(): parameter.requires_grad = True for parameter in self.mask_rcnn.roi_heads.parameters(): parameter.requires_grad = True
def __init__(self, dictionary=None): super(MaskRCNN, self).__init__() self.dictionary = dictionary self.input_size = [512, 512] self.dummy_input = torch.zeros(1, 3, self.input_size[0], self.input_size[1]) self.num_classes = len(self.dictionary) self.category = [v for d in self.dictionary for v in d.keys()] self.weight = [ d[v] for d in self.dictionary for v in d.keys() if v in self.category ] # load an instance segmentation model pre-trained pre-trained on COCO self.model = maskrcnn_resnet50_fpn(pretrained=True) # get number of input features for the classifier in_features = self.model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one self.model.roi_heads.box_predictor = FastRCNNPredictor( in_features, self.num_classes) # now get the number of input features for the mask classifier in_features_mask = self.model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 # and replace the mask predictor with a new one self.model.roi_heads.mask_predictor = MaskRCNNPredictor( in_features_mask, hidden_layer, self.num_classes)
def maskrcnn_resnet50_fpn(input_size=None, output_size=None): """with pretrained_backbone""" if import_error is not None: raise import_error if not isinstance(output_size, int): output_size = numpy.product(input_size) return detection.maskrcnn_resnet50_fpn(num_classes=output_size)
def make_model(cfg): """Initializes the model. Args: cfg (Config): pass in all configurations """ if cfg.model_name == 'maskrcnn_resnet50_fpn': if cfg.coco_pretrained: model = maskrcnn_resnet50_fpn(pretrained=True) else: model = maskrcnn_resnet50_fpn(num_classes=cfg.num_classes, pretrained=False) pretrained_num_classes = ( model.roi_heads.mask_predictor.mask_fcn_logits.out_channels) swap_predictors = ((cfg.num_classes != pretrained_num_classes) or cfg.swap_model_predictors) if swap_predictors: # replace the pre-trained FasterRCNN head with a new one model.roi_heads.box_predictor = FastRCNNPredictor( # in_features model.roi_heads.box_predictor.cls_score.in_features, # num_classes cfg.num_classes) # replace the pre-trained MaskRCNN head with a new one model.roi_heads.mask_predictor = MaskRCNNPredictor( # in_features_mask model.roi_heads.mask_predictor.conv5_mask.in_channels, # hidden_layer model.roi_heads.mask_predictor.conv5_mask.out_channels, # num_classes cfg.num_classes) elif cfg.model_name == 'adjust_anchor': anchor_generator = AnchorGenerator( sizes=((16, ), (32, ), (64, ), (128, ), (256, )), aspect_ratios=((0.8, 1.0, 1.25), ) * 5) backbone = resnet_fpn_backbone('resnet50', pretrained=True) model = MaskRCNN(backbone=backbone, num_classes=cfg.num_classes, rpn_anchor_generator=anchor_generator) else: raise NotImplementedError return model
def __init__(self): super(MaskRCNN, self).__init__() net = maskrcnn_resnet50_fpn(pretrained=True) net.eval() raw_layers = list(net.children()) self.transform = raw_layers[0] self.backbone = raw_layers[1] self.rpn = raw_layers[2] self.roi_heads = raw_layers[3] self.interested_ids = [3, 6, 8]
def __init__(self, num_classes=2, hidden_size=256): super().__init__() self.model_ft = maskrcnn_resnet50_fpn(pretrained=True) in_features = self.model_ft.roi_heads.box_predictor.cls_score.in_features self.model_ft.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) in_features_mask = self.model_ft.roi_heads.mask_predictor.conv5_mask.in_channels self.model_ft.roi_heads.mask_predictor = MaskRCNNPredictor( in_features_mask, hidden_size, num_classes) for param in self.model_ft.parameters(): param.requires_grad = True
def mask_rcnn(pretrained=False, num_classes=1 + 90, representation=1024, backbone=None, with_mask=True, **kwargs): if backbone is None: model = maskrcnn_resnet50_fpn(pretrained, pretrained_backbone=not pretrained, progress=True, **kwargs) else: model = maskrcnn_resnet50_fpn(pretrained, pretrained_backbone=False, progress=True, **kwargs) model.backbone = backbone in_features = model.roi_heads.box_predictor.cls_score.in_features out_features = model.roi_heads.box_predictor.cls_score.out_features if representation != in_features: logging.info( f"Replaced box_head with representation size of {representation}") out_channels = model.backbone.out_channels resolution = model.roi_heads.box_roi_pool.output_size[0] model.roi_heads.box_head = TwoMLPHead(out_channels * resolution**2, representation) if representation != in_features or num_classes != out_features: logging.info( f"Replaced box_predictor with (representation, num_classes) = ({representation}, {num_classes})" ) model.roi_heads.box_predictor = FastRCNNPredictor( representation, num_classes) if not with_mask: model.roi_heads.mask_roi_pool = None model.roi_heads.mask_head = None model.roi_heads.mask_predictor = None return THDetector(model)
def __init__(self, batch, device): from torchvision.models.detection import maskrcnn_resnet50_fpn import torch # from torchvision import transforms super(MaskRCNNDetectorTorch, self).__init__() self.model = maskrcnn_resnet50_fpn(pretrained=True) self.model.eval() self.batch = batch self.device = torch.device(device) self.model.to(self.device) self.MEAN = np.array([.485, .456, .406]) self.STD = np.array([.299, .224, .225])
def get_model_instance_segmentation(num_classes): model = maskrcnn_resnet50_fpn(pretrained=True) in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) mask_predictor_in_channels = model.roi_heads.mask_predictor.conv5_mask.in_channels model.roi_heads.mask_predictor = MaskRCNNPredictor( mask_predictor_in_channels, mask_dim_reduced=256, num_classes=num_classes) return model
def get_torchvision_maskrcnn( num_classes: int = 91, trainable_backbone_layers: int = 3, anchor_sizes: list = [32, 64, 128, 256, 512], anchor_aspect_ratios: list = [0.5, 1.0, 2.0], rpn_pre_nms_top_n_train: int = 2000, rpn_pre_nms_top_n_test: int = 1000, rpn_post_nms_top_n_train: int = 2000, rpn_post_nms_top_n_test: int = 1000, rpn_nms_thresh: float = 0.7, rpn_fg_iou_thresh: float = 0.7, rpn_bg_iou_thresh: float = 0.3, box_detections_per_img: int = 100, pretrained: bool = False, ): # prepare anchor params anchor_sizes = tuple( [tuple((anchor_size, )) for anchor_size in anchor_sizes]) aspect_ratios = tuple(anchor_aspect_ratios) aspect_ratios = (aspect_ratios, ) * len(anchor_sizes) # load an instance segmentation model pre-trained on COCO rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) model = maskrcnn_resnet50_fpn( trainable_backbone_layers=trainable_backbone_layers, pretrained=pretrained, pretrained_backbone=pretrained, rpn_anchor_generator=rpn_anchor_generator, rpn_pre_nms_top_n_train=rpn_pre_nms_top_n_train, rpn_pre_nms_top_n_test=rpn_pre_nms_top_n_test, rpn_post_nms_top_n_train=rpn_post_nms_top_n_train, rpn_post_nms_top_n_test=rpn_post_nms_top_n_test, rpn_nms_thresh=rpn_nms_thresh, rpn_fg_iou_thresh=rpn_fg_iou_thresh, rpn_bg_iou_thresh=rpn_bg_iou_thresh, box_detections_per_img=box_detections_per_img, ) # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) # now get the number of input features for the mask classifier in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 # and replace the mask predictor with a new one model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes) return model
def mcoco(): """Common preparation routine: Obtain coco dataset handle for mask_r_cnn model""" # Dataset initialization: model_stump = ModelStump( model=maskrcnn_resnet50_fpn(pretrained=True), stump_head=TestCOCOConceptActivationDataset.LAYER_KEY) dataset: ConceptDataset = ConceptDataset(**default_coco_spec()) coco = ActivationDatasetWrapper( act_map_gen=model_stump, dataset=dataset) yield coco TestCOCOConceptActivationDataset.cleanup(coco)
def detect_test(args): print("Loading detector...") maskrcnn = maskrcnn_resnet50_fpn(pretrained=True) if torch.cuda.is_available(): maskrcnn.cuda() maskrcnn.eval() print("Loaded !\n") for f in tqdm(os.listdir(args.data + '/test_images/mistery_category')): if 'jpg' in f: data = data_transforms['detect']( pil_loader(args.data + '/test_images/mistery_category/' + f)) data = data.view(1, data.size(0), data.size(1), data.size(2)).cuda() results = maskrcnn(data.cuda()) for e, result in enumerate(results): boxes = result['boxes'].tolist() # Bounding boxes labels = result['labels'].tolist() # Labels scores = result['scores'].tolist( ) # Confidence associated with bounding box # Keep only bird labels and boxes (label 16 in COCO) only_bird_boxes = np.array( [boxes[i] for i in range(len(boxes)) if labels[i] == 16]) only_birds_scores = np.array( [scores[i] for i in range(len(boxes)) if labels[i] == 16]) # if low confidence -> hard image if only_bird_boxes.size == 0 or only_birds_scores.max() < 0.85: shutil.copy( args.data + '/test_images/mistery_category/' + f, args.data + '/test_images/hard_test_images') else: try: i = np.argmax(only_birds_scores) box = only_bird_boxes[i] a, b, c, d = int(box[0]), int(box[1]), int( box[2]), int(box[3]) # Crop image on bird cropped = data[e, :, b:d, a:c] shutil.copy( args.data + '/test_images/mistery_category/' + f, args.data + '/test_images/easy_test_images') except ValueError: # Bounding box outside image (very rare) pass
def __init__(self, num_classes=2, hidden_size=256): super().__init__() # load an instance segmentation model pre-trained pre-trained on COCO self.model_ft = maskrcnn_resnet50_fpn(pretrained=True) # get number of input features for the classifier in_features = self.model_ft.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one self.model_ft.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) # now get the number of input features for the mask classifier in_features_mask = self.model_ft.roi_heads.mask_predictor.conv5_mask.in_channels # and replace the mask predictor with a new one self.model_ft.roi_heads.mask_predictor = MaskRCNNPredictor( in_features_mask, hidden_size, num_classes )
def connect(self, model_name='model'): model_name = 'model_fbgemm' cached_file = self.get_checkpoint(model_urls[model_name]) if model_name == 'model': model = maskrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False) model.load_state_dict(torch.load(cached_file)) else: # scripted model loading... model = torch.jit.load(cached_file) model.transform.max_size = 800 model.transform.min_size = (640, ) model.eval() return model
def __init__(self, calibration_file): """ calibration_file: [str] path to calibration.txt file """ self.maskrnn = maskrcnn_resnet50_fpn(pretrained=True) self.toTensor = transforms.ToTensor() _ = self.maskrnn.eval() with open(calibration_file, 'r') as f: calib = f.readlines() fx = float(calib[3].split()[0]) fy = float(calib[3].split()[1]) cx = float(calib[3].split()[2]) cy = float(calib[3].split()[3]) self.intrinsics = [fx, fy, cx, cy]
def __init__(self, n_channels=3, n_classes=21, softmax_out=False, resnet_type=101, pretrained=False): super(MaskRCNN, self).__init__() self.resnet_type = resnet_type self.n_channels = n_channels self.n_classes = n_classes self.pretrained = pretrained # Input conv is applied to convert the input to 3 ch depth self.inconv = None if n_channels != 3: self.inconv = FwdConv(n_channels, 3, kernel_size=1, padding=0) # Pre-trained model needs to be an identical network if pretrained: self.body = maskrcnn_resnet50_fpn(pretrained=pretrained, num_classes=91, min_size=512) # Reset output if n_classes != 91: self.body.roi_heads.box_predictor.cls_score = nn.Linear( in_features=1024, out_features=n_classes, bias=True) self.body.roi_heads.box_predictor.bbox_pred = nn.Linear( in_features=1024, out_features=4 * n_classes, bias=True) self.body.roi_heads.mask_predictor.mask_fcn_logits = nn.Conv2d( 256, n_classes, kernel_size=(1, 1), stride=(1, 1)) else: self.body = fasterrcnn_resnet50_fpn(pretrained=pretrained, num_classes=n_classes, min_size=512) # Softmax alternative self.has_softmax = softmax_out if softmax_out: self.softmax = nn.Softmax2d() else: self.softmax = None
def test_maskrcnn_resnet50_fpn_frozen_layers(self): # we know how many initial layers and parameters of the maskrcnn should # be frozen for each trainable_backbone_layers paramter value # i.e all 53 params are frozen if trainable_backbone_layers=0 # ad first 24 params are frozen if trainable_backbone_layers=2 expected_frozen_params = {0: 53, 1: 43, 2: 24, 3: 11, 4: 1, 5: 0} for train_layers, exp_froz_params in expected_frozen_params.items(): model = maskrcnn_resnet50_fpn( pretrained=True, progress=False, num_classes=91, pretrained_backbone=False, trainable_backbone_layers=train_layers) # boolean list that is true if the parameter at that index is frozen is_frozen = [ not parameter.requires_grad for _, parameter in model.named_parameters() ] # check that expected initial number of layers in maskrcnn are frozen self.assertTrue(all(is_frozen[:exp_froz_params]))
def do_main(model, data=None, output_path=None, priorities=None, th_mask=0.5, th_scores=0.75, class_num=91): src_img = None if data is not None: src_img = data if output_path is None: output_path = os.getcwd() + "temp_output.png" else: raise RuntimeError("invalid data input") if model is None: model = detection.maskrcnn_resnet50_fpn(num_classes=91, pretrained=True) model.eval() # in_features = model.roi_heads.box_predictor.cls_score.in_features # model.roi_heads.box_predictor = detection.faster_rcnn.FastRCNNPredictor(in_features, class_num) masked_img = None masks = None masks = [] for idx in range(len(src_img)): if idx == priorities[0]: masks.append(None) continue input_img = [] # prepare ndarray with normalization and switch channel # print("index for model: ", idx) img = torch.from_numpy(np.expand_dims(src_img[idx] / 255., 2)).permute(2, 0, 1).float() input_img.append(img) prediction = model(input_img) mask = get_mask_gray(prediction, src_img[idx], th_mask, th_scores) masks.append(mask) return masks
def __init__(self, categories): super().__init__() logging.info(f'creating model with categories: {categories}') # todo(will.brennan) - find a nicer way of saving the categories in the state dict... self._categories = nn.ParameterDict( {i: nn.Parameter(torch.Tensor(0)) for i in categories}) num_categories = len(self._categories) self.model = detection.maskrcnn_resnet50_fpn(pretrained=True) logging.debug('changing num_categories for bbox predictor') in_features = self.model.roi_heads.box_predictor.cls_score.in_features self.model.roi_heads.box_predictor = detection.faster_rcnn.FastRCNNPredictor( in_features, num_categories) logging.debug('changing num_categories for mask predictor') in_features_mask = self.model.roi_heads.mask_predictor.conv5_mask.in_channels self.model.roi_heads.mask_predictor = detection.mask_rcnn.MaskRCNNPredictor( in_features_mask, 256, num_categories)
def main(): anchor_generator = AnchorGenerator(sizes=tuple([(16, 24, 32, 48, 96) for _ in range(5)]), aspect_ratios=tuple([ (0.5, 1.0, 2.0) for _ in range(5) ])) rpnhead = RPNHead(256, anchor_generator.num_anchors_per_location()[0]) model = maskrcnn_resnet50_fpn(num_classes=2, pretrained_backbone=True, max_size=MAX_SIZE, rpn_head=rpnhead, rpn_anchor_generator=anchor_generator, rpn_pre_nms_top_n_train=12000, rpn_pre_nms_top_n_test=6000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=300, rpn_fg_iou_thresh=0.5, rpn_bg_iou_thresh=0.3, rpn_positive_fraction=0.7, bbox_reg_weights=(1.0, 1.0, 1.0, 1.0), box_batch_size_per_image=32) model.load_state_dict( torch.load('saved_models' + os.sep + '0_deeplesion.pth', map_location='cpu')) data_transforms = { 'train': T.Compose([ T.ToOriginalHU(INTENSITY_OFFSET), T.IntensityWindowing(WINDOWING), T.SpacingResize(NORM_SPACING, MAX_SIZE), T.ToTensor() ]), 'val': T.Compose([ T.ToOriginalHU(INTENSITY_OFFSET), T.IntensityWindowing(WINDOWING), T.SpacingResize(NORM_SPACING, MAX_SIZE), T.ToTensor() ]), 'test': T.Compose([ T.ToOriginalHU(INTENSITY_OFFSET), T.IntensityWindowing(WINDOWING), T.SpacingResize(NORM_SPACING, MAX_SIZE), T.ToTensor() ]) } image_datasets = { x: DeepLesion(DIR_IN + os.sep + x, GT_FN_DICT[x], data_transforms[x]) for x in ['train', 'val', 'test'] } dataloaders = { x: DataLoader(image_datasets[x], batch_size=3, shuffle=True, num_workers=0, collate_fn=BatchCollator) for x in ['train', 'val', 'test'] } for batch_id, (inputs, targets) in enumerate(dataloaders['test']): outputs = test_model(model, inputs) outputs = remove_overlapping(outputs, 0.655) for image, target, output in zip(inputs, targets, outputs): img_copy = image.squeeze().numpy() images = [img_copy] * 3 images = [im.astype(float) for im in images] img_copy = cv2.merge(images) for bbox, pseudo_mask in zip(target["boxes"], target["masks"]): bbox = bbox.squeeze().numpy() bbox = np.int16(bbox) mask = pseudo_mask.squeeze().numpy() cv2.rectangle(img_copy, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 1) msk_idx = np.where(mask == 1) img_copy[msk_idx[0], msk_idx[1], 0] = 255 for predbox, predmask, score in zip(output['boxes'], output['masks'], output['scores']): if score < 0.655: break predbox = predbox.numpy() predmask = predmask.squeeze().numpy() score = score.numpy() predmask = np.where(predmask > 0.5, 1, 0) cv2.rectangle(img_copy, (predbox[0], predbox[1]), (predbox[2], predbox[3]), (0, 0, 255), 1) pmsk_idx = np.where(predmask == 1) img_copy[pmsk_idx[0], pmsk_idx[1], 2] = 255 cv2.putText(img_copy, str(score), (int(predbox[0]), int(predbox[1] - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA) # cv2.imshow(str(target['image_id']), img_copy) cv2.imwrite( 'simple_test' + os.sep + str(target['image_id']).replace(os.sep, '_') + '_pred.jpg', img_copy * 255)
def __init__(self): self.model = maskrcnn_resnet50_fpn(pretrained=True) self.model.eval()
def __init__(self, config=None, torchvision_init=True, lidar=False): ''' Handles everything - training, validation testing - checkpoint loading and saving - logging | tensorboard summaries Accordingly everything is specified here - model - loss - optimizer - lr scheduling Arguments: torchvision_init: boolean - True: load densenet state dict from torchvision - False: load checkpoint; if no checkpoint just normal init ''' self.logger = logging.getLogger('Agent') # model and config if lazy self.model = maskrcnn_resnet50_fpn(pretrained=True, progress=True, num_classes=91, # have to if pretrained pretrained_backbone=True, trainable_backbone_layers=3) # 0 being noe and 5 all ''' # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) # now get the number of input features for the mask classifier in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 # and replace the mask predictor with a new one model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes) ''' self.lidar = lidar if self.lidar: # add one channel to first layer self.model.backbone.body.conv1 = nn.Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) # replace final layer to 4 classes: background, vehicle, pedestrian, cyclist self.model.roi_heads.mask_predictor.mask_fcn_logits = nn.Conv2d(256, 4, kernel_size=(1, 1), stride=(1, 1)) # in case config is empty it is created in model if config is None: self.config = utils.get_config() else: self.config = config # dataloader self.data_loader = WaymoDataset_Loader(self.config) # pixel-wise cross-entropy loss self.loss = torch.nn.BCEWithLogitsLoss(reduction='none').cuda() # optimizer self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.config.optimizer.learning_rate, betas=(self.config.optimizer.beta1, self.config.optimizer.beta2), eps=self.config.optimizer.eps, weight_decay=self.config.optimizer.weight_decay, amsgrad=self.config.optimizer.amsgrad) # learning rate decay scheduler if self.config.optimizer.lr_scheduler.want: self.lr_scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=self.config.optimizer.lr_scheduler.every_n_epochs, gamma=self.config.optimizer.lr_scheduler.gamma) # initialize counters; updated in load_checkpoint self.current_epoch = 0 self.current_train_iteration = 0 self.current_val_iteration = 0 self.best_val_iou = 0 # if cuda is available export model to gpu self.cuda = torch.cuda.is_available() if self.cuda: self.device = torch.device('cuda') torch.cuda.manual_seed_all(self.config.agent.seed) self.logger.info('Operation will be on *****GPU-CUDA***** ') else: self.device = torch.device('cpu') torch.manual_seed(self.config.agent.seed) self.logger.info('Operation will be on *****CPU***** ') self.model = self.model.to(self.device) self.loss = self.loss.to(self.device) if not torchvision_init: self.load_checkpoint() # Tensorboard Writers Path(self.config.dir.current_run.summary).mkdir(exist_ok=True, parents=True) self.train_summary_writer = SummaryWriter(log_dir=self.config.dir.current_run.summary, comment='FasterRCNNResNet50') self.val_summary_writer = SummaryWriter(log_dir=self.config.dir.current_run.summary, comment='FasterRCNNResNet50')
plt.show() # fig = plt.figure() # for i in range(10): # fig.add_subplot(1, 10, i + 1) # plt.imshow(d[22551 + i * 42, 22551 + i * 42][0].permute((1, 2, 0))) # plt.show() # fig = plt.figure() # for i in range(10): # fig.add_subplot(1, 10, i+1) # plt.imshow(d[1178494 + i * 42, 1178494 + i * 42][0].permute((1, 2, 0))) # plt.show() #%% from torchvision.models.detection import maskrcnn_resnet50_fpn m = maskrcnn_resnet50_fpn(pretrained=True).eval().cuda(2) #%% min_length = 423 prestine_video_start = 22551 neural_textures_start = 1178494 from torchvision.transforms import ToTensor from tqdm import tqdm import torch def get_lable_scores(dataset, start, end): label_scores = [] i = 0 for idx in tqdm(range(start, end)):
def evaluate(cls, env, model, r_idx, resnet, traj_data, args, lock, successes, failures, results): # reset model model.reset() # setup scene reward_type = 'dense' cls.setup_scene(env, traj_data, r_idx, args, reward_type=reward_type) # extract language features feat = model.featurize([(traj_data, False)], load_mask=False) # goal instr goal_instr = traj_data['turk_annotations']['anns'][r_idx]['task_desc'] maskrcnn = maskrcnn_resnet50_fpn(num_classes=119) maskrcnn.eval() maskrcnn.load_state_dict(torch.load('weight_maskrcnn.pt')) maskrcnn = maskrcnn.cuda() prev_image = None prev_action = None nav_actions = [ 'MoveAhead_25', 'RotateLeft_90', 'RotateRight_90', 'LookDown_15', 'LookUp_15' ] prev_class = 0 prev_center = torch.zeros(2) done, success = False, False fails = 0 t = 0 reward = 0 while not done: # break if max_steps reached if t >= args.max_steps: break # extract visual features curr_image = Image.fromarray(np.uint8(env.last_event.frame)) feat['frames'] = resnet.featurize([curr_image], batch=1).unsqueeze(0) # forward model m_out = model.step(feat) m_pred = model.extract_preds(m_out, [(traj_data, False)], feat, clean_special_tokens=False) m_pred = list(m_pred.values())[0] # action prediction action = m_pred['action_low'] if prev_image == curr_image and prev_action == action and prev_action in nav_actions and action in nav_actions and action == 'MoveAhead_25': dist_action = m_out['out_action_low'][0][0].detach().cpu() idx_rotateR = model.vocab['action_low'].word2index( 'RotateRight_90') idx_rotateL = model.vocab['action_low'].word2index( 'RotateLeft_90') action = 'RotateLeft_90' if dist_action[ idx_rotateL] > dist_action[ idx_rotateR] else 'RotateRight_90' if action == cls.STOP_TOKEN: print("\tpredicted STOP") break # mask prediction mask = None if model.has_interaction(action): class_dist = m_pred['action_low_mask'][0] pred_class = np.argmax(class_dist) # mask generation with torch.no_grad(): out = maskrcnn([to_tensor(curr_image).cuda()])[0] for k in out: out[k] = out[k].detach().cpu() if sum(out['labels'] == pred_class) == 0: mask = np.zeros( (constants.SCREEN_WIDTH, constants.SCREEN_HEIGHT)) else: masks = out['masks'][out['labels'] == pred_class].detach().cpu() scores = out['scores'][out['labels'] == pred_class].detach().cpu() # Instance selection based on the minimum distance between the prev. and cur. instance of a same class. if prev_class != pred_class: scores, indices = scores.sort(descending=True) masks = masks[indices] prev_class = pred_class prev_center = masks[0].squeeze( dim=0).nonzero().double().mean(dim=0) else: cur_centers = torch.stack([ m.nonzero().double().mean(dim=0) for m in masks.squeeze(dim=1) ]) distances = ((cur_centers - prev_center)**2).sum(dim=1) distances, indices = distances.sort() masks = masks[indices] prev_center = cur_centers[0] mask = np.squeeze(masks[0].numpy(), axis=0) # print action if args.debug: print(action) # use predicted action and mask (if available) to interact with the env t_success, _, _, err, _ = env.va_interact( action, interact_mask=mask, smooth_nav=args.smooth_nav, debug=args.debug) if not t_success: fails += 1 if fails >= args.max_fails: print("Interact API failed %d times" % fails + "; latest error '%s'" % err) break # next time-step t_reward, t_done = env.get_transition_reward() reward += t_reward t += 1 prev_image = curr_image prev_action = action # check if goal was satisfied goal_satisfied = env.get_goal_satisfied() if goal_satisfied: print("Goal Reached") success = True # goal_conditions pcs = env.get_goal_conditions_met() goal_condition_success_rate = pcs[0] / float(pcs[1]) # SPL path_len_weight = len(traj_data['plan']['low_actions']) s_spl = (1 if goal_satisfied else 0) * min( 1., path_len_weight / (float(t) + 1e-4)) pc_spl = goal_condition_success_rate * min( 1., path_len_weight / (float(t) + 1e-4)) # path length weighted SPL plw_s_spl = s_spl * path_len_weight plw_pc_spl = pc_spl * path_len_weight # log success/fails lock.acquire() log_entry = { 'trial': traj_data['task_id'], 'type': traj_data['task_type'], 'repeat_idx': int(r_idx), 'goal_instr': goal_instr, 'completed_goal_conditions': int(pcs[0]), 'total_goal_conditions': int(pcs[1]), 'goal_condition_success': float(goal_condition_success_rate), 'success_spl': float(s_spl), 'path_len_weighted_success_spl': float(plw_s_spl), 'goal_condition_spl': float(pc_spl), 'path_len_weighted_goal_condition_spl': float(plw_pc_spl), 'path_len_weight': int(path_len_weight), 'reward': float(reward) } if success: successes.append(log_entry) else: failures.append(log_entry) # overall results results['all'] = cls.get_metrics(successes, failures) print("-------------") print("SR: %d/%d = %.5f" % (results['all']['success']['num_successes'], results['all']['success']['num_evals'], results['all']['success']['success_rate'])) print("PLW SR: %.5f" % (results['all']['path_length_weighted_success_rate'])) print( "GC: %d/%d = %.5f" % (results['all']['goal_condition_success'] ['completed_goal_conditions'], results['all']['goal_condition_success']['total_goal_conditions'], results['all']['goal_condition_success'] ['goal_condition_success_rate'])) print( "PLW GC: %.5f" % (results['all']['path_length_weighted_goal_condition_success_rate'] )) print("-------------") # task type specific results task_types = [ 'pick_and_place_simple', 'pick_clean_then_place_in_recep', 'pick_heat_then_place_in_recep', 'pick_cool_then_place_in_recep', 'pick_two_obj_and_place', 'look_at_obj_in_light', 'pick_and_place_with_movable_recep' ] for task_type in task_types: task_successes = [ s for s in (list(successes)) if s['type'] == task_type ] task_failures = [ f for f in (list(failures)) if f['type'] == task_type ] if len(task_successes) > 0 or len(task_failures) > 0: results[task_type] = cls.get_metrics(task_successes, task_failures) else: results[task_type] = {} lock.release()
def get_model(pre_trained, pretrained_backbone, numclasses): anchor_generator = AnchorGenerator(sizes=tuple([(16, 24, 32, 48, 96) for _ in range(5)]), aspect_ratios=tuple([ (0.5, 1.0, 2.0) for _ in range(5) ])) rpnhead = RPNHead(256, anchor_generator.num_anchors_per_location()[0]) if pre_trained: # dl_model = maskrcnn_resnet50_fpn(pretrained=pre_trained, max_size=MAX_SIZE, rpn_head=rpnhead # , rpn_anchor_generator=anchor_generator, rpn_pre_nms_top_n_train=12000 # , rpn_pre_nms_top_n_test=6000, rpn_post_nms_top_n_train=2000 # , rpn_post_nms_top_n_test=300, rpn_fg_iou_thresh=0.5, rpn_bg_iou_thresh=0.3 # , rpn_positive_fraction=0.7, bbox_reg_weights=(1.0, 1.0, 1.0, 1.0) # , box_batch_size_per_image=32) dl_model = maskrcnn_resnet50_fpn(pretrained=pre_trained, max_size=MAX_SIZE, rpn_pre_nms_top_n_train=12000, rpn_pre_nms_top_n_test=6000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=300, rpn_fg_iou_thresh=0.5, rpn_bg_iou_thresh=0.3, rpn_positive_fraction=0.7, bbox_reg_weights=(1.0, 1.0, 1.0, 1.0), box_batch_size_per_image=32) # dl_model = maskrcnn_resnet50_fpn(pretrained=pre_trained, max_size=MAX_SIZE) # del dl_model.state_dict()["roi_heads.box_predictor.bbox_pred.weight"] # del dl_model.state_dict()["roi_heads.box_predictor.cls_score.weight"] # del dl_model.state_dict()["roi_heads.box_predictor.cls_score.bias"] # del dl_model.state_dict()["roi_heads.box_predictor.bbox_pred.bias"] # Remove incompatible parameters # newdict = removekey(dl_model.state_dict(), ['roi_heads.box_predictor.cls_score.bias' # , 'roi_heads.box_predictor.cls_score.weight' # , 'roi_heads.box_predictor.bbox_pred.bias' # , 'roi_heads.box_predictor.bbox_pred.weight']) # dl_model.state_dict = newdict # dl_model.load_state_dict(newdict) for param in dl_model.parameters(): param.requires_grad = False # replace the classifier with a new one, that has # num_classes which is user-defined num_classes = numclasses # 1 class (lesion) + background # get number of input features for the classifier in_features = dl_model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one dl_model.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) # now get the number of input features for the mask classifier in_features_mask = dl_model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 # and replace the mask predictor with a new one dl_model.roi_heads.mask_predictor = MaskRCNNPredictor( in_features_mask, hidden_layer, num_classes) else: dl_model = maskrcnn_resnet50_fpn( num_classes=numclasses, pretrained_backbone=pretrained_backbone, max_size=MAX_SIZE, rpn_head=rpnhead, rpn_anchor_generator=anchor_generator, rpn_pre_nms_top_n_train=12000, rpn_pre_nms_top_n_test=6000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=300, rpn_fg_iou_thresh=0.5, rpn_bg_iou_thresh=0.3, rpn_positive_fraction=0.7, bbox_reg_weights=(1.0, 1.0, 1.0, 1.0), box_batch_size_per_image=32) return dl_model
def __init__(self, backbone='fasterrcnn', use_pretrained=True, num_classes=91): super(Detection, self).__init__() if backbone == 'fasterrcnn': self.detection_backbone = fasterrcnn_resnet50_fpn(pretrained=use_pretrained, num_classes=num_classes).cuda() else: self.detection_backbone = maskrcnn_resnet50_fpn(pretrained=use_pretrained, num_classes=num_classes).cuda()
Used the pytorch Mask R-CNN Resnet50 library to identify the child and then using the mask, applied binary image-segmentation to represent the child pixel as '1' and background pixel as '0' Further, calculating the mask area and the percentage of body pixels to total image pixels """ import time from imgseg.predict import predict import numpy as np from torchvision.models.detection import maskrcnn_resnet50_fpn model = maskrcnn_resnet50_fpn(pretrained=True) def predict_by_resize(image, factor=10): """Applied MaskRCNN on downscaled image, by default the factor is 10x.""" print("Resizing image by", factor, "x") newsize = (int(image.size[0] / factor), int(image.size[1] / factor)) print("Resized Dimension", newsize) start_time = time.time() out = predict(image.resize(newsize), model) print("Time: %s s" % (time.time() - start_time)) # Binary Image Segmentation threshold = 0.5 masks = out['masks'][0][0] masks = masks > threshold
# # We will here describe the output of a Mask-RCNN model. The models in # :ref:`object_det_inst_seg_pers_keypoint_det` all have a similar output # format, but some of them may have extra info like keypoints for # :func:`~torchvision.models.detection.keypointrcnn_resnet50_fpn`, and some # of them may not have masks, like # :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn`. from torchvision.models.detection import maskrcnn_resnet50_fpn, MaskRCNN_ResNet50_FPN_Weights weights = MaskRCNN_ResNet50_FPN_Weights.DEFAULT transforms = weights.transforms() batch = transforms(batch_int) model = maskrcnn_resnet50_fpn(weights=weights, progress=False) model = model.eval() output = model(batch) print(output) ##################################### # Let's break this down. For each image in the batch, the model outputs some # detections (or instances). The number of detections varies for each input # image. Each instance is described by its bounding box, its label, its score # and its mask. # # The way the output is organized is as follows: the output is a list of length # ``batch_size``. Each entry in the list corresponds to an input image, and it # is a dict with keys 'boxes', 'labels', 'scores', and 'masks'. Each value # associated to those keys has ``num_instances`` elements in it. In our case
for img, mask in zip(batch_int, all_classes_masks) ] show(dogs_with_masks) ##################################### # Instance segmentation models # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # Instance segmentation models have a significantly different output from the # semantic segmentation models. We will see here how to plot the masks for such # models. Let's start by analyzing the output of a Mask-RCNN model. Note that # these models don't require the images to be normalized, so we don't need to # use the normalized batch. from torchvision.models.detection import maskrcnn_resnet50_fpn model = maskrcnn_resnet50_fpn(pretrained=True, progress=False) model = model.eval() output = model(batch) print(output) ##################################### # Let's break this down. For each image in the batch, the model outputs some # detections (or instances). The number of detection varies for each input # image. Each instance is described by its bounding box, its label, its score # and its mask. # # The way the output is organized is as follows: the output is a list of length # ``batch_size``. Each entry in the list corresponds to an input image, and it # is a dict with keys 'boxes', 'labels', 'scores', and 'masks'. Each value # associated to those keys has ``num_instances`` elements in it. In our case
print(f'Memory after server started: {mem()}') model_name = 'model_fbgemm_bool' cached_file = load_model(model_urls[model_name]) print(f'Memory after weights loaded: {mem()}') torch.set_grad_enabled(False) #print('Supported engines: ', torch.backends.quantized.supported_engines) torch._C._jit_set_profiling_executor(False) torch._C._jit_set_profiling_mode(False) torch.jit.optimized_execution(False) #torch.backends.quantized.engine = 'qnnpack' if model_name == 'model': from torchvision.models.detection import maskrcnn_resnet50_fpn checkpoint = torch.load(cached_file) if 'model' in checkpoint.keys(): checkpoint = checkpoint['model'] model = maskrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False) sys.stderr.write('Torchvision model loading...\n') model.load_state_dict(checkpoint) else: # scripted model loading... sys.stderr.write('Scripted model loading...\n') model = torch.jit.load(cached_file) print(f'Memory after model loaded: {mem()}') model.transform.max_size = 800 model.transform.min_size = (640, ) model.eval() # model warm-up ''' t = time.time() with torch.jit.optimized_execution(True), torch.no_grad(): for i in range(1):