def __init__( self, learning_rate: float = 0.0001, num_classes: int = 91, backbone: Optional[Union[str, torch.nn.Module]] = None, fpn: bool = True, pretrained: bool = False, pretrained_backbone: bool = True, trainable_backbone_layers: int = 3, **kwargs: Any, ): """ Args: learning_rate: the learning rate num_classes: number of detection classes (including background) backbone: Pretained backbone CNN architecture or torch.nn.Module instance. fpn: If True, creates a Feature Pyramind Network on top of Resnet based CNNs. pretrained: if true, returns a model pre-trained on COCO train2017 pretrained_backbone: if true, returns a model with backbone pre-trained on Imagenet trainable_backbone_layers: number of trainable resnet layers starting from final block """ if not _TORCHVISION_AVAILABLE: # pragma: no cover raise ModuleNotFoundError( "You want to use `torchvision` which is not installed yet.") super().__init__() self.learning_rate = learning_rate self.num_classes = num_classes self.backbone = backbone if backbone is None: self.model = fasterrcnn_resnet50_fpn( pretrained=pretrained, pretrained_backbone=pretrained_backbone, trainable_backbone_layers=trainable_backbone_layers, ) in_features = self.model.roi_heads.box_predictor.cls_score.in_features self.model.roi_heads.box_predictor = FastRCNNPredictor( in_features, self.num_classes) else: if isinstance(self.backbone, torch.nn.Module): backbone_model = self.backbone if pretrained_backbone: import warnings warnings.warn( "You would need to load the pretrained state_dict yourself if you are " "providing backbone of type torch.nn.Module / pl.LightningModule." ) else: backbone_model = create_fasterrcnn_backbone( self.backbone, fpn, pretrained_backbone, trainable_backbone_layers, **kwargs, ) self.model = torchvision_FasterRCNN(backbone_model, num_classes=num_classes, **kwargs)
class Model: # 构建并加载模型参数 CLASS_NAMES = ['__background__', 'A', 'B', 'C', 'D', 'X'] model = torchvision.models.detection.fasterrcnn_resnet50_fpn() num_classes = 6 in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) model.load_state_dict(torch.load('model-use.pth')) model.eval() clf = KMeans(n_clusters=8) # 8簇kemns聚类模型 # 预测 def prediction(self, img, threshold): # img = Image.open(img) img = cv2.imread(img) # 读取图片,并将图片转为黑底白字 img[img > 180] = 255 img = 255 - img img[img > 100] = 255 img = Image.fromarray(img.astype('uint8')).convert('RGB') transform = transforms.Compose([transforms.ToTensor()]) img = transform(img) img = img.to(self.device) pred = self.model([img]) # Pass the image to the model pred_class = [self.CLASS_NAMES[i] for i in list(pred[0]['labels'].to("cpu").numpy())] pred_boxes = [[int(i[0]), int(i[1]), int(i[2]), int(i[3])] for i in list(pred[0]['boxes'].to("cpu").detach().numpy())] pred_score = list(pred[0]['scores'].to("cpu").detach().numpy()) pred_t = [pred_score.index(x) for x in pred_score if x >= threshold][-1] # 筛选在阈值之上的目标 pred_boxes = pred_boxes[:pred_t+1] pred_class = pred_class[:pred_t+1] pred_score = pred_score[:pred_t+1] for i in range(len(pred_score)): pred_score[i] = int(pred_score[i] * 100) # 置信率转为整数 return pred_boxes, pred_class, pred_score # 将预测目标进行排序,返回排序后的结果 def getAns(self, img, threshold): pred_boxes, pred_class, pred_score = self.prediction(img, threshold) letters = [] for i in range(len(pred_boxes)): if pred_class[i] != 'X': letter = Letter(pred_boxes[i], pred_class[i], pred_score[i]) letters.append(letter) letters.sort(key=lambda x: x.boxesn[0] + 2000 * x.boxesn[1]) # 按照纵坐标粗排 letters_y = np.array([(x.boxesn[1] + x.boxesn[3]) / 2 for x in letters]).reshape(-1, 1) self.clf.fit(letters_y) # 将所有目标的纵坐标使用聚类,细分出每行的目标,然后行内按照横坐标排序 row_labels = self.clf.predict(letters_y) last_row_label = -1 begin = 0 sorted_letters = [] # 存储排序后的目标字母 row_labels = np.append(row_labels, 99) for i, row_label in enumerate(row_labels): if row_label != last_row_label: # 新的一行 temp = letters[begin:i] temp.sort(key=lambda x: x.boxesn[0]) sorted_letters = sorted_letters + temp begin = i last_row_label = row_label return sorted_letters
def train(folder): # import torchvision # from torchvision.models.detection import FasterRCNN # from torchvision.models.detection.rpn import AnchorGenerator from torchvision.models.detection.faster_rcnn import FastRCNNPredictor model = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=True) num_classes = 4 # (Red, Yellow, Green, Unknown) # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # Different ties but the above works best # backbone = torchvision.models.mobilenet_v2(pretrained=True).features # FasterRCNN needs to know the number of # output channels in a backbone. For mobilenet_v2, it's 1280 # so we need to add it here # backbone.out_channels = 1280 # let's make the RPN generate 5 x 3 anchors per spatial # location, with 5 different sizes and 3 different aspect # ratios. We have a Tuple[Tuple[int]] because each feature # map could potentially have different sizes and # aspect ratios # anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), # aspect_ratios=((0.5, 1.0, 2.0),)) # let's define what are the feature maps that we will # use to perform the region of interest cropping, as well as # the size of the crop after rescaling. # if your backbone returns a Tensor, featmap_names is expected to # be [0]. More generally, the backbone should return an # OrderedDict[Tensor], and in featmap_names you can choose which # feature maps to use. # roi_pooler = torchvision.ops.MultiScaleRoIAlign( # featmap_names=['0', '1', '2', '3'], # output_size=7, # sampling_ratio=2) # put the pieces together inside a FasterRCNN model # model = FasterRCNN(backbone, # num_classes=4, # rpn_anchor_generator=anchor_generator, # box_roi_pool=roi_pooler) # replace the pre-trained head with a new one # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # use our dataset and defined transformations dataset = BSTLDataset(root=folder) # dataset_test = BSTLDataset(train=False) indices = torch.randperm(len(dataset)).tolist() dataset = torch.utils.data.Subset(dataset, indices[:-50]) # dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:]) # define training and validation data loaders data_loader = torch.utils.data.DataLoader(dataset, batch_size=10, shuffle=True, num_workers=8, collate_fn=collate_fn) # data_loader_test = torch.utils.data.DataLoader( # dataset_test, batch_size=1, shuffle=False, num_workers=1, # collate_fn=collate_fn) # move model to the right device model.to(device) # construct an optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) # and a learning rate scheduler # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, # step_size=3, # gamma=0.1) lr_scheduler = None # let's train it for 100 epochs num_epochs = 100 loop = tqdm(total=(num_epochs), position=0) loss_hist = Averager() for epoch in range(num_epochs): loss_hist.reset() # train for one epoch, printing every 10 iterations tloss = run_one_epoch(data_loader, optimizer, model, lr_scheduler, device, loss_hist) # eloss = evaluate(model, data_loader_test, device=device) eloss = 0.0 loop.set_description( 'epoch:{}, train loss:{:.4f}, test loss:{:.4f}'.format( epoch, tloss, eloss)) loop.update(1) if epoch % 10 == 0: torch.save(model.state_dict(), '/tmp/tlight_' + str(epoch) + '.pt')
def pretrained_model(model_name, model_dict=pretrained_model_dict,num_classes=2): model = model_dict[model_name] in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) return model
data_loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=4, shuffle=True, num_workers=4, collate_fn=utils.collate_fn) data_loader_val = torch.utils.data.DataLoader(dataset_val, batch_size=1, shuffle=True, num_workers=4, collate_fn=utils.collate_fn) model = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=True) num_ftrs = model.roi_heads.box_predictor.bbox_pred.in_features model.roi_heads.box_predictor = FastRCNNPredictor(num_ftrs, len(class_names)) num_epochs = 5 model = model.to(device) best_mAP = 0.0 for lr in learningRates: optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9) scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) best_lr_mAP = 0.0 dataset_sizes = {'train': len(dataset_train), 'val': len(dataset_val)} best_model_wts = copy.deepcopy(model.state_dict()) for epoch in range(num_epochs):
def main(args): utils.init_distributed_mode(args) print(args) # Opening YAML cfg config file with open(args.cfg_file, 'r') as stream: try: cfg_file = yaml.safe_load(stream) except yaml.YAMLError as exc: print(exc) # Retrieving cfg test_cfg = cfg_file['test'] model_cfg = cfg_file['model'] data_cfg = cfg_file['dataset'] # Setting device device = torch.device(model_cfg['device']) # Retrieving pretrained model available_pretrained_models = test_cfg['pretrained_models'] pretrained_model_name = args.load_model assert pretrained_model_name in available_pretrained_models.keys(), \ "Pretrained model {} not available".format(pretrained_model_name) checkpoint_path = available_pretrained_models[pretrained_model_name] # Creating model print("Creating model") if "50" in pretrained_model_name: model = fasterrcnn_resnet50_fpn( pretrained=False, pretrained_backbone=False, box_detections_per_img=model_cfg["max_dets_per_image"], box_score_thresh=model_cfg["det_thresh"], box_nms_thresh=model_cfg["nms"], model_dir=model_cfg["cache_folder"], ) else: model = fasterrcnn_resnet101_fpn( pretrained=False, pretrained_backbone=False, box_detections_per_img=model_cfg["max_dets_per_image"], box_score_thresh=model_cfg["det_thresh"], box_nms_thresh=model_cfg["nms"], model_dir=model_cfg["cache_folder"], ) # Loading weights if not "coco" in pretrained_model_name: num_classes = 1 + 1 # num classes + background # Getting number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # Replacing the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) if checkpoint_path.startswith('http://') or checkpoint_path.startswith( 'https://'): checkpoint = torch.hub.load_state_dict_from_url( checkpoint_path, map_location='cpu', model_dir=model_cfg["cache_folder"]) else: checkpoint = torch.load(checkpoint_path, map_location='cpu') if 'model' in checkpoint.keys(): checkpoint = checkpoint['model'] model.load_state_dict(checkpoint) # Putting model to device and setting eval mode model.to(device) model.eval() # Retrieving phase and some data parameters phase = test_cfg['phase'] assert phase == "test" or phase == "val", "Not valid phase" data_root = data_cfg['root'] datasets_names = data_cfg[phase] # Creating dataset(s) and dataloader(s) percentage = None if phase == "val": percentage = test_cfg['percentage_val'] for dataset_name, dataset_cfg in datasets_names.items(): # Creating dataset dataset = CustomYoloAnnotatedDataset(data_root, {dataset_name: dataset_cfg}, transforms=get_transform(), phase=phase, percentage=percentage) dataloader = DataLoader(dataset, batch_size=test_cfg['batch_size'], shuffle=False, num_workers=test_cfg['num_workers'], collate_fn=dataset.standard_collate_fn) # Evaluate evaluate(test_cfg, model, dataloader, dataset_name, split=dataset_cfg.rsplit(".", 1)[1], args=args) print('DONE!')
def main(args): # --- CONFIG device = torch.device(f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu") # --------- # --- TRANSFORMATIONS train_transform = ToTensor() test_transform = ToTensor() # --------- # --- SCENARIO CREATION torch.random.manual_seed(1234) n_exps = 5 benchmark = split_penn_fudan( n_experiences=n_exps, train_transform=train_transform, eval_transform=test_transform, ) # --------- # MODEL CREATION num_classes = benchmark.n_classes + 1 # N classes + background if args.detection_only: # Ingore the segmentation task # load a model pre-trained on COCO model = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=True) # Replace the classifier with a new one, that has "num_classes" outputs # 1) Get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # 2) Replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) else: # Detection + Segmentation model = torchvision.models.detection.maskrcnn_resnet50_fpn( pretrained=True) # Replace the classifier with a new one, that has "num_classes" outputs # 1) Get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # 2) Replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) # now get the number of input features for the mask classifier in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 # and replace the mask predictor with a new one model.roi_heads.mask_predictor = MaskRCNNPredictor( in_features_mask, hidden_layer, num_classes) model = model.to(device) # Define the optimizer and the scheduler params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) train_mb_size = 5 warmup_factor = 1.0 / 1000 warmup_iters = min( 1000, len(benchmark.train_stream[0].dataset) // train_mb_size - 1) lr_scheduler = torch.optim.lr_scheduler.LinearLR( optimizer, start_factor=warmup_factor, total_iters=warmup_iters) # CREATE THE STRATEGY INSTANCE (NAIVE) cl_strategy = ObjectDetectionTemplate( model=model, optimizer=optimizer, train_mb_size=train_mb_size, train_epochs=1, eval_mb_size=train_mb_size, device=device, plugins=[ LRSchedulerPlugin( lr_scheduler, step_granularity="iteration", first_exp_only=True, first_epoch_only=True, ) ], evaluator=EvaluationPlugin( timing_metrics(epoch=True), loss_metrics(epoch_running=True), make_penn_fudan_metrics(detection_only=args.detection_only), loggers=[InteractiveLogger()], ), ) # TRAINING LOOP print("Starting experiment...") for i, experience in enumerate(benchmark.train_stream): print("Start of experience: ", experience.current_experience) print("Train dataset contains", len(experience.dataset), "instances") cl_strategy.train(experience, num_workers=4) print("Training completed") cl_strategy.eval(benchmark.test_stream, num_workers=4) print("Evaluation completed")
def train(base_dir, n_splits=5, n_epochs=40, batch_size=16, train_folds=None, model_name='faster-rcnn-baseline', eval_per_n_epochs=10, seed=15501, verbose=True): """ Train frcnn baseline. Largely inspired by: https://www.kaggle.com/pestipeti/pytorch-starter-fasterrcnn-train train_splits expects a list/tuple of ints. If train_splits, only train the specified splits. """ np.random.seed(seed) data_dir = os.path.join(base_dir, 'data') train_imgs_dir = os.path.join(data_dir, 'train') test_imgs_dir = os.path.join(data_dir, 'test') models_out_dir = os.path.join(base_dir, 'artifacts', model_name, 'models') os.makedirs(models_out_dir, exist_ok=True) preds_out_dir = os.path.join(base_dir, 'artifacts', model_name, 'predictions') os.makedirs(preds_out_dir, exist_ok=True) log_file = os.path.join(base_dir, 'artifacts', model_name, 'train.log') open(log_file, 'a').close() # create empty file. logger = logging.getLogger(model_name) logger.addHandler(logging.FileHandler(log_file)) logger.setLevel(logging.INFO) train_df, test_df = get_train_test_df(data_dir) kf = GroupKFold(n_splits) split = kf.split(X=train_df[['image_id']], y=train_df[['x', 'y', 'w', 'h']], groups=train_df['image_id']) if isinstance(train_folds, (list, tuple)): split = [fold for i, fold in enumerate(split) if i in train_folds] info = f'Training only on folds {train_folds}.' log_message(log_message, logger, verbose) else: train_folds = range(n_splits) if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') for split_n, (train_idx, val_idx) in zip(train_folds, split): info = f'Training fold {split_n} beginning.' log_message(info, logger, verbose) train = train_df.iloc[train_idx].copy() val = train_df.iloc[val_idx].copy() train_dataset = WheatDataset(train, train_imgs_dir, get_train_transform()) val_dataset = WheatDataset(val, train_imgs_dir, get_valid_transform()) test_dataset = WheatDataset(test_df, test_imgs_dir, get_test_transform(), train=False) # load pretrained faster-rcnn with resnet50 backbone model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) # update pre-trained head num_classes = 2 in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, collate_fn=collate_fn) val_data_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=4, collate_fn=collate_fn) test_data_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=4, drop_last=False, collate_fn=collate_fn) model.to(device) params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 3, gamma=0.75) loss_hist = LossAverager() tstart = time.time() for epoch in range(1, n_epochs+1): info = f'Training epoch #{epoch}.' log_message(info, logger, verbose) loss_hist.reset() model.train() it = 1 for images, targets, _ in train_data_loader: for im, targ in zip(images, targets): if torch.isnan(im).any(): info = f'ERROR: NaN in input image. Epoch {epoch}, iteration {it}.' log_message(info, logger, verbose, err=True) continue for key, val in targ.items(): if torch.isnan(val).any(): info = f'ERROR: NaN in target {key}. Epoch {epoch}, iteration {it}.' log_message(info, logger, verbose, err=True) continue images = list(image.to(device) for image in images) targets = [{k: v.long().to(device) for k, v in t.items()} for t in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) loss_value = losses.item() if not math.isfinite(loss_value): info = f'Loss {loss_value} is not finite. Epoch {epoch}, iteration {it}.' log_message(info, logger, verbose, err=True) optimizer.zero_grad() continue loss_hist.send(loss_value) optimizer.zero_grad() losses.backward() # torch.nn.utils.clip_grad_value_(model.parameters(), 2) optimizer.step() if it % 20 == 0: info = f'Iteration #{it} loss: {loss_value}' log_message(info, logger, verbose) it += 1 lr_scheduler.step() tepoch = time.time() - tstart info = f'Epoch #{epoch} completed after {tepoch // 60} minutes {round(tepoch % 60)} seconds. Loss: {loss_hist.value}.' log_message(info, logger, verbose) if epoch+1 % eval_per_n_epochs == 0: # may want to add this to eval.py... somehow? thresholds = np.linspace(0.5, 0.75, 6) precisions_by_thresh = [] model.eval() for images, targets, _ in val_data_loader: images = list(image.to(device) for image in images) outputs = model(images) for targ, out in zip(targets, outputs): gt = targ['boxes'].cpu().numpy().astype(np.int32) scores = out['scores'].data.cpu().numpy() # predictions ordered by confidence preds = out['boxes'].data.cpu().numpy()[np.argsort(scores)] ap_by_thresh = calculate_image_precision_by_threshold(gt, preds, thresholds=thresholds, form='pascal_voc') precisions_by_thresh.extend(ap_by_thresh) mean_precisions_by_thresh = pd.DataFrame(precisions_by_thresh, columns=['thresh', 'ap']) mean_precisions_by_thresh = mean_precisions_by_thresh.groupby('thresh')['ap'].mean().reset_index() mean_ap = mean_precisions_by_thresh['ap'].mean() for thresh, ap in zip(mean_precisions_by_thresh['thresh'], mean_precisions_by_thresh['ap']): info = f'Epoch #{epoch} - AP at IOU threshold {thresh}: {ap}.' log_message(info, logger, verbose) info = f'Epoch #{epoch} - Mean AP across all thresholds: {mean_ap}.' log_message(info, logger, verbose) # save model. torch.save(model.state_dict(), os.path.join(models_out_dir, f'trained_fold_{split_n}.pth')) model.eval() detection_threshold = 0.1 res = [] for images, _, image_ids in val_data_loader: images = list(image.to(device) for image in images) outputs = model(images) for output, image_id in zip(outputs, image_ids): boxes = output['boxes'].data.cpu().numpy() scores = output['scores'].data.cpu().numpy() boxes = boxes[scores >= detection_threshold].astype(np.int32) scores = scores[scores >= detection_threshold] boxes[:, 2] = boxes[:, 2] - boxes[:, 0] boxes[:, 3] = boxes[:, 3] - boxes[:, 1] for out in np.hstack([boxes, scores.reshape(-1, 1)]): res.append([image_id] + list(out)) df_res = pd.DataFrame(res, columns=['image_id', 'x', 'y', 'w', 'h', 'score']) df_res.to_csv(os.path.join(preds_out_dir, f'oof_pred_fold_{split_n}.csv'), index=False) detection_threshold = 0.1 res = [] for images, image_ids in test_data_loader: images = list(image.to(device) for image in images) outputs = model(images) for output, image_id in zip(outputs, image_ids): boxes = output['boxes'].data.cpu().numpy() scores = output['scores'].data.cpu().numpy() boxes = boxes[scores >= detection_threshold].astype(np.int32) scores = scores[scores >= detection_threshold] boxes[:, 2] = boxes[:, 2] - boxes[:, 0] boxes[:, 3] = boxes[:, 3] - boxes[:, 1] for out in np.hstack([boxes, scores.reshape(-1, 1)]): res.append([image_id] + list(out)) df_res = pd.DataFrame(res, columns=['image_id', 'x', 'y', 'w', 'h', 'score']) df_res.to_csv(os.path.join(preds_out_dir, f'test_pred_fold_{split_n}.csv'), index=False)
test_loader = PascalDetectionDataset(test_pascal, PATH_PASCAL + PASCAL_IMG, PATH_PASCAL + PASCAL_XML, transform_detection_pascal) ##Hyperparameters for detection num_archi_features = len(archi_features) num_classes_detection = num_archi_features + 1 # num_archi_features + background num_styles = len(styles) ##Build detection model if args.weight == "bbox_level": from utils.pytorch_utils import fasterrcnn_resnet50_fpn_custom detector = fasterrcnn_resnet50_fpn_custom(True) in_features = detector.roi_heads.box_predictor.cls_score.in_features detector.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes_detection) else: detector = models.detection.fasterrcnn_resnet50_fpn(True) in_features = detector.roi_heads.box_predictor.cls_score.in_features detector.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes_detection) if args.exp_weights == 'exponential': is_exponential = True elif args.exp_weights == 'linear': is_exponential = False else: print("Unrecognized type of weighting, defaulted to linear") is_exponential = False detector.cuda()
def train(batch_size, checkpoint_freq, num_epochs): num_classes = 2 model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True, ) in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes) model = torch.nn.DataParallel(model) model.to('cuda') dataset = PennFudanDataset('PennFudanPed', get_transform(train=True)) dataset_test = PennFudanDataset('PennFudanPed', get_transform(train=False)) indices = torch.randperm(len(dataset)).tolist() dataset = torch.utils.data.Subset(dataset, indices[:-50]) dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:]) data_loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, shuffle=True, num_workers=4, collate_fn=utils.collate_fn) data_loader_test = torch.utils.data.DataLoader( dataset_test, batch_size=batch_size, shuffle=False, num_workers=4, collate_fn=utils.collate_fn) params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) hook = smd.Hook.create_from_json_file() for epoch in range(num_epochs): hook.set_mode(modes.TRAIN) model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) if epoch == 0: warmup_factor = 1. / 1000 warmup_iters = min(1000, len(data_loader) - 1) lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) for iteration, (images, targets) in enumerate(data_loader): images = list(image.to('cuda') for image in images) targets = [{k: v.to('cuda') for k, v in t.items()} for t in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) loss_dict_reduced = utils.reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_value = losses_reduced.item() optimizer.zero_grad() losses.backward() optimizer.step() if lr_scheduler is not None: lr_scheduler.step() metric_logger.update(loss=losses_reduced, **loss_dict_reduced) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) utils.save_on_master({ 'model': model.state_dict(), 'optimizer': optimizer.state_dict() }, 'model_{}.pth') lr_scheduler.step() hook.set_mode(modes.EVAL) evaluate(model, data_loader_test, device='cuda')
def build_and_set_model(self): # create mask rcnn model num_classes = 2 self.device = torch.device( "cpu") # TODO check if cuda is supported, or we just use cpu # more details at https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html # finetuning # load a model pre-trained on COCO, num_classes=91, cannot change.... as the pretrained model won't load self.model_ft = torchvision.models.detection.maskrcnn_resnet50_fpn( pretrained=True, image_mean=self.img_mean, image_std=self.img_std) # FL = FocalLoss(gamma=2, alpha=0.75) # Version 5, gamma=2, alpha=0.75, 0.8034... # FL = FocalLoss(gamma=1, alpha=0.75, magnifier=3) # + early stop,2 stop at 7, (data split) version 7 0.8026 # FL = FocalLoss(gamma=1, alpha=0.5, magnifier=3) # version 8 0.8031, 6 epoch # FL = FocalLoss(gamma=0.5, alpha=0.5, magnifier=1) # command line submission, 4 epochs cv+aug FL = FocalLoss( gamma=0.5, alpha=0.5, magnifier=1 ) # changed lr decay 2/0.15 + patience=3, do not use focal loss... FL_wrapped = functools.partial(maskrcnn_loss_focal, focal_loss_func=FL) # FL_wrapped = None # changed lr decay 2/0.15, do not use focal loss... 0.8025 RoIHeads_loss_customized.set_customized_loss( self.model_ft.roi_heads, maskrcnn_loss_customized=FL_wrapped) RoIHeads_loss_customized.update_forward_func(self.model_ft.roi_heads) # get number of input features for the classifier in_features = self.model_ft.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one self.model_ft.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) # change mask prediction head, only predict background and pneu... part in_features_mask = self.model_ft.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 self.model_ft.roi_heads.mask_predictor = MaskRCNNPredictor( in_features_mask, hidden_layer, num_classes) # GPU # my_trace() # test about to self.model_ft.to(self.device) # self.logger.debug(f"model info:\n{self.model_ft}") # for param in self.model_ft.parameters(): # param.requires_grad = True params = [p for p in self.model_ft.parameters() if p.requires_grad] start_learning_rate = 0.001 try: if self._debug_continue_training: # monkey patch start_learning_rate = 0.00001 except Exception: pass self.optimizer = torch.optim.SGD(params, lr=start_learning_rate, momentum=0.9, weight_decay=0.0005) self.lr_scheduler = torch.optim.lr_scheduler.StepLR( self.optimizer, step_size=4, # after changed to 3, 0.8042 (improved from 0.8033) # step_size 4, with little aug, 0.8037 (Version 11) gamma=0.1, )
def detection_fasterrcnn(img_path, finetune=False): torch.cuda.empty_cache() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load a model pre-trained pre-trained on COCO model = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=True) # replace the classifier with a new one, that has # num_classes which is user-defined if finetune: num_classes = 2 # 1 class (person) + background # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) model = model.to(device) model.eval() print(model) # load color image img = cv2.imread(img_path) img_tr = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_tr = np.transpose(img_tr, (2, 0, 1)).astype(np.float32) / 255.0 # print(img.shape) t = torch.from_numpy(img_tr).to(device) t = t.unsqueeze(0) with torch.no_grad(): out = model(t) # print(out) boxes = out[0]["boxes"].data.cpu().numpy() scores = out[0]["scores"].data.cpu().numpy() labels = out[0]["labels"].data.cpu().tolist() category = { 0: 'background', 1: 'person', 2: 'traffic light', 3: 'train', 4: 'traffic sign', 5: 'rider', 6: 'car', 7: 'bike', 8: 'motor', 9: 'truck', 10: 'bus' } boxes = boxes[scores >= 0.5].astype(np.int32) pnum = 0 for i, box in enumerate(boxes): cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), thickness=2) if labels[i] == 1: draw_texts(img, 'person ' + str(round(scores[i], 3)), offset_x=box[0], offset_y=box[1]) pnum += 1 draw_texts(img, 'people: ' + str(pnum), offset_x=10, offset_y=20, color=(0, 255, 0)) cv2.imshow("result", img) cv2.waitKey(0)
def train(opt): num_gpus = 1 if torch.cuda.is_available(): num_gpus = torch.cuda.device_count() torch.cuda.manual_seed(123) else: torch.manual_seed(123) training_params = {"batch_size": opt.batch_size * num_gpus, "shuffle": True, "drop_last": True, "collate_fn": collate_fn, "num_workers": 12} test_params = {"batch_size": opt.batch_size, "shuffle": False, "drop_last": False, "collate_fn": collate_fn, "num_workers": 12} training_set = WaymoDataset( cameras=[opt.cam_view],scope='training', transform=transforms.Compose([Normalizer(), Resizer()]), mod='fast_rcnn') training_generator = DataLoader(training_set, **training_params) test_set = WaymoDataset( cameras=[opt.cam_view], scope='validation', transform=transforms.Compose([Normalizer(), Resizer()]), mod='fast_rcnn') test_generator = DataLoader(test_set, **test_params) print(f'Using pretrained model? {opt.pretrained_model}') model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=opt.pretrained_model) # num_classes which is user-defined num_classes = training_set.num_classes() # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one, this will really need to be trained! model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) model.load_state_dict(torch.load('trained_models/fasterrcnn_resnet50_waymo.pth')) # only if we use the pretrained model if opt.pretrained_model: transfer_learning(model, opt.freeze_layers) # Chosing the device/cpu or gpu device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.saved_path): os.makedirs(opt.saved_path) # writer = SummaryWriter(opt.log_path) if torch.cuda.is_available(): model = model.cuda() model = nn.DataParallel(model) # construct an optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) # and a learning rate scheduler which decreases the learning rate by # 10x every 3 epochs scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epochs): model.train() epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): optimizer.zero_grad() images = data[0] targets = data[1] images = list(image for image in images) targets = [{k: v for k, v in t.items()} for t in targets] if torch.cuda.is_available(): losses = model(images.cuda(), targets.cuda()) cls_loss, reg_loss = losses['loss_classifier'], losses['loss_box_reg'] else: losses = model(images, targets) cls_loss, reg_loss = losses['loss_classifier'], losses['loss_box_reg'] cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0: continue loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) total_loss = np.mean(epoch_loss) if iter % 5 == 0: print(f'Total loss at iteration {iter}: {total_loss}') progress_bar.set_description( 'Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Batch loss: {:.5f} Total loss: {:.5f}'.format( epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss, reg_loss, loss, total_loss)) # writer.add_scalar('Train/Total_loss', total_loss, epoch * num_iter_per_epoch + iter) # writer.add_scalar('Train/Regression_loss', reg_loss, epoch * num_iter_per_epoch + iter) # writer.add_scalar('Train/Classfication_loss (focal loss)', cls_loss, epoch * num_iter_per_epoch + iter) # Save every 100 samples if iter % 200 ==0: print(f"Saving model at :{opt.saved_path}/fasterrcnn_resnet50_waymo.pth") torch.save(model.state_dict(), os.path.join(opt.saved_path, "fasterrcnn_resnet50_waymo.pth")) # torch.save(model, os.path.join(opt.saved_path, "fasterrcnn_resnet50_waymo.pth")) scheduler.step(np.mean(epoch_loss)) if epoch % opt.test_interval == 0: loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(test_generator): with torch.no_grad(): images = data[0] targets = data[1] images = list(image for image in images) targets = [{k: v for k, v in t.items()} for t in targets] if torch.cuda.is_available(): losses = model(images.cuda(), targets.cuda()) cls_loss, reg_loss = losses['loss_classifier'], losses['loss_box_reg'] else: losses = model(images, targets) cls_loss, reg_loss = losses['loss_classifier'], losses['loss_box_reg'] cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss_classification_ls.append(float(cls_loss)) loss_regression_ls.append(float(reg_loss)) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( 'Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'.format( epoch + 1, opt.num_epochs, cls_loss, reg_loss, np.mean(loss))) # writer.add_scalar('Test/Total_loss', loss, epoch) # writer.add_scalar('Test/Regression_loss', reg_loss, epoch) # writer.add_scalar('Test/Classfication_loss (focal loss)', cls_loss, epoch) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch torch.save(model.state_dict(), os.path.join(opt.saved_path, "fasterrcnn_resnet50_waymo.pth")) # torch.save(model, os.path.join(opt.saved_path, "fasterrcnn_resnet50_waymo.pth")) dummy_input = torch.rand(opt.batch_size, 3, 512, 512) if torch.cuda.is_available(): dummy_input = dummy_input.cuda() if isinstance(model, nn.DataParallel): model.module.backbone_net.model.set_swish(memory_efficient=False) torch.onnx.export(model.module, dummy_input, os.path.join(opt.saved_path, "fasterrcnn_resnet50_waymo.onnx"), verbose=False) model.module.backbone_net.model.set_swish(memory_efficient=True) else: model.backbone_net.model.set_swish(memory_efficient=False) torch.onnx.export(model, dummy_input, os.path.join(opt.saved_path, "fasterrcnn_resnet50_waymo.onnx"), verbose=False) model.backbone_net.model.set_swish(memory_efficient=True) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print("Stop training at epoch {}. The lowest loss achieved is {}".format(epoch, loss)) break
def __init__(self, config, device): super(ESRGAN_EESN_FRCNN_Model, self).__init__(config, device) self.configG = config['network_G'] self.configD = config['network_D'] self.configT = config['train'] self.configO = config['optimizer']['args'] self.configS = config['lr_scheduler'] self.config = config self.device = device #Generator self.netG = model.ESRGAN_EESN(in_nc=self.configG['in_nc'], out_nc=self.configG['out_nc'], nf=self.configG['nf'], nb=self.configG['nb']) self.netG = self.netG.to(self.device) self.netG = DataParallel(self.netG) #descriminator self.netD = model.Discriminator_VGG_128(in_nc=self.configD['in_nc'], nf=self.configD['nf']) self.netD = self.netD.to(self.device) self.netD = DataParallel(self.netD) #FRCNN_model self.netFRCNN = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=True) num_classes = 2 # car and background in_features = self.netFRCNN.roi_heads.box_predictor.cls_score.in_features self.netFRCNN.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) self.netFRCNN.to(self.device) self.netG.train() self.netD.train() self.netFRCNN.train() #print(self.configT['pixel_weight']) # G CharbonnierLoss for final output SR and GT HR self.cri_charbonnier = CharbonnierLoss().to(device) # G pixel loss if self.configT['pixel_weight'] > 0.0: l_pix_type = self.configT['pixel_criterion'] if l_pix_type == 'l1': self.cri_pix = nn.L1Loss().to(self.device) elif l_pix_type == 'l2': self.cri_pix = nn.MSELoss().to(self.device) else: raise NotImplementedError( 'Loss type [{:s}] not recognized.'.format(l_pix_type)) self.l_pix_w = self.configT['pixel_weight'] else: self.cri_pix = None # G feature loss #print(self.configT['feature_weight']+1) if self.configT['feature_weight'] > 0: l_fea_type = self.configT['feature_criterion'] if l_fea_type == 'l1': self.cri_fea = nn.L1Loss().to(self.device) elif l_fea_type == 'l2': self.cri_fea = nn.MSELoss().to(self.device) else: raise NotImplementedError( 'Loss type [{:s}] not recognized.'.format(l_fea_type)) self.l_fea_w = self.configT['feature_weight'] else: self.cri_fea = None if self.cri_fea: # load VGG perceptual loss self.netF = model.VGGFeatureExtractor(feature_layer=34, use_input_norm=True, device=self.device) self.netF = self.netF.to(self.device) self.netF = DataParallel(self.netF) self.netF.eval() # GD gan loss self.cri_gan = GANLoss(self.configT['gan_type'], 1.0, 0.0).to(self.device) self.l_gan_w = self.configT['gan_weight'] # D_update_ratio and D_init_iters self.D_update_ratio = self.configT['D_update_ratio'] if self.configT[ 'D_update_ratio'] else 1 self.D_init_iters = self.configT['D_init_iters'] if self.configT[ 'D_init_iters'] else 0 # optimizers # G wd_G = self.configO['weight_decay_G'] if self.configO[ 'weight_decay_G'] else 0 optim_params = [] for k, v in self.netG.named_parameters( ): # can optimize for a part of the model if v.requires_grad: optim_params.append(v) self.optimizer_G = torch.optim.Adam(optim_params, lr=self.configO['lr_G'], weight_decay=wd_G, betas=(self.configO['beta1_G'], self.configO['beta2_G'])) self.optimizers.append(self.optimizer_G) # D wd_D = self.configO['weight_decay_D'] if self.configO[ 'weight_decay_D'] else 0 self.optimizer_D = torch.optim.Adam(self.netD.parameters(), lr=self.configO['lr_D'], weight_decay=wd_D, betas=(self.configO['beta1_D'], self.configO['beta2_D'])) self.optimizers.append(self.optimizer_D) # FRCNN -- use weigt decay FRCNN_params = [ p for p in self.netFRCNN.parameters() if p.requires_grad ] self.optimizer_FRCNN = torch.optim.SGD(FRCNN_params, lr=0.005, momentum=0.9, weight_decay=0.0005) self.optimizers.append(self.optimizer_FRCNN) # schedulers if self.configS['type'] == 'MultiStepLR': for optimizer in self.optimizers: self.schedulers.append( lr_scheduler.MultiStepLR_Restart( optimizer, self.configS['args']['lr_steps'], restarts=self.configS['args']['restarts'], weights=self.configS['args']['restart_weights'], gamma=self.configS['args']['lr_gamma'], clear_state=False)) elif self.configS['type'] == 'CosineAnnealingLR_Restart': for optimizer in self.optimizers: self.schedulers.append( lr_scheduler.CosineAnnealingLR_Restart( optimizer, self.configS['args']['T_period'], eta_min=self.configS['args']['eta_min'], restarts=self.configS['args']['restarts'], weights=self.configS['args']['restart_weights'])) else: raise NotImplementedError( 'MultiStepLR learning rate scheme is enough.') print(self.configS['args']['restarts']) self.log_dict = OrderedDict() self.print_network() # print network self.load() # load G and D if needed
def initialize_model(): model = fasterrcnn_resnet101_fpn(pretrained=False) in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2) return model
def main(args): #img_path='./icremation_imgs/demo_imgs' device = torch.device(args.cuda_device) # device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') PATH = args.model_path transform1 = transforms.Compose([ transforms.ToTensor(), # range [0, 255] -> [0.0,1.0] ]) num_classes = int(configs["number_of_class"]) + 1 # class + background # # get number of input features for the classifier if configs["model_name"] == "mask_rcnn": model = torchvision.models.detection.maskrcnn_resnet50_fpn( pretrained=True) elif configs["model_name"] == "faster_rcnn": model = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=True) # # replace the classifier with a new one, that has # # num_classes which is user-defined num_classes = int(configs["number_of_class"]) + 1 # class + background # # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) if configs["model_name"] == "mask_rcnn": in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 # and replace the mask predictor with a new one model.roi_heads.mask_predictor = MaskRCNNPredictor( in_features_mask, hidden_layer, num_classes) checkpoint = torch.load(PATH) model.load_state_dict(checkpoint) model.apply(deactivate_batchnorm) model.to(device) model.eval() cap = cv2.VideoCapture(args.video_path) while True: success, image = cap.read() if not success: break image1 = Image.fromarray(cv2.cvtColor(image.copy(), cv2.COLOR_BGR2RGB)) image_tensor = transform1(image1) image_tensor = image_tensor.unsqueeze(0).to(device) predictions = model(image_tensor) masks = predictions[0]["masks"] scores = predictions[0]["scores"] class_id = predictions[0]["labels"] boxes = predictions[0]["boxes"] pts = [] for i, mask in enumerate(masks): score = scores.data[i] if score < args.score_thr: continue bb_box = boxes.data[i] if configs["model_name"] == "mask_rcnn": mask = mask.cpu().detach().numpy() mask[mask >= 0.6] = 1 mask = np.reshape(mask, (mask.shape[1], mask.shape[2], mask.shape[0])).astype(np.uint8) color = (0, 255, 0) contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cv2.drawContours(image, contours, 0, color, 4) mask_copy = np.reshape( mask, (mask.shape[0], mask.shape[1])).astype(np.uint8) image = apply_mask(image, mask_copy, color) cv2.rectangle(image, (bb_box[2], bb_box[1]), (bb_box[0], bb_box[3]), color, 2) #cv2.imwrite("result.png",image) show_image = cv2.resize(image.copy(), (720, 480)) cv2.imshow('image', show_image) if cv2.waitKey(1) & 0xFF == ord('q'): break
# Training epochs num_epochs = opt.num_epochs # Number of images in a batch batch_size = opt.batch_size # Image size max_size = opt.max_size min_size = opt.min_size # Initial model model = fasterrcnn_resnet50_fpn(pretrained=True, min_size=min_size, max_size=max_size) in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2) # load a pre-trained model for classification and return # only the features if neck_name == 'fpn': out_channels = 256 backbone = resnet_fpn_backbone(backbone_name, pretrained=True) backbone.out_channels = out_channels model.backbone = backbone elif neck_name == 'gfpn': out_channels = 256 backbone = GroupedPyramidFeatures(backbone_name=backbone_name, out_features=out_channels, pretrained=True) backbone.out_channels = out_channels model.backbone = backbone
def __init__(self, train_data, mode='sgcls', require_overlap_det=True, use_bias=False, test_bias=False, backbone='vgg16', RELS_PER_IMG=1024, min_size=None, max_size=None, edge_model='motifs'): """ Base class for an SGG model :param mode: (sgcls, predcls, or sgdet) :param require_overlap_det: Whether two objects must intersect """ super(RelModelBase, self).__init__() self.classes = train_data.ind_to_classes self.rel_classes = train_data.ind_to_predicates self.mode = mode self.backbone = backbone self.RELS_PER_IMG = RELS_PER_IMG self.pool_sz = 7 self.stride = 16 self.use_bias = use_bias self.test_bias = test_bias self.require_overlap = require_overlap_det and self.mode == 'sgdet' if self.backbone == 'resnet50': self.obj_dim = 1024 self.fmap_sz = 21 if min_size is None: min_size = 1333 if max_size is None: max_size = 1333 print('\nLoading COCO pretrained model maskrcnn_resnet50_fpn...\n') # See https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html self.detector = torchvision.models.detection.maskrcnn_resnet50_fpn( pretrained=True, min_size=min_size, max_size=max_size, box_detections_per_img=50, box_score_thresh=0.2) in_features = self.detector.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one self.detector.roi_heads.box_predictor = FastRCNNPredictor( in_features, len(self.classes)) self.detector.roi_heads.mask_predictor = None layers = list(self.detector.roi_heads.children())[:2] self.roi_fmap_obj = copy.deepcopy(layers[1]) self.roi_fmap = copy.deepcopy(layers[1]) self.roi_pool = copy.deepcopy(layers[0]) elif self.backbone == 'vgg16': self.obj_dim = 4096 self.fmap_sz = 38 if min_size is None: min_size = IM_SCALE if max_size is None: max_size = IM_SCALE vgg = load_vgg(use_dropout=False, use_relu=False, use_linear=True, pretrained=False) vgg.features.out_channels = 512 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign( featmap_names=['0'], output_size=self.pool_sz, sampling_ratio=2) self.detector = FasterRCNN(vgg.features, min_size=min_size, max_size=max_size, rpn_anchor_generator=anchor_generator, box_head=TwoMLPHead( vgg.features.out_channels * self.pool_sz**2, self.obj_dim), box_predictor=FastRCNNPredictor( self.obj_dim, len(train_data.ind_to_classes)), box_roi_pool=roi_pooler, box_detections_per_img=50, box_score_thresh=0.2) self.roi_fmap = nn.Sequential(nn.Flatten(), vgg.classifier) self.roi_fmap_obj = load_vgg(pretrained=False).classifier self.roi_pool = copy.deepcopy( list(self.detector.roi_heads.children())[0]) else: raise NotImplementedError(self.backbone) self.edge_dim = self.detector.backbone.out_channels self.union_boxes = UnionBoxesAndFeats(pooling_size=self.pool_sz, stride=self.stride, dim=self.edge_dim, edge_model=edge_model) if self.use_bias: self.freq_bias = FrequencyBias(train_data)
def __init__(self, backbone, num_classes=None, # transform parameters min_size=800, max_size=1333, image_mean=None, image_std=None, # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # Box parameters box_roi_pool=None, box_head=None, box_predictor=None, box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, box_batch_size_per_image=512, box_positive_fraction=0.25, bbox_reg_weights=None): if not hasattr(backbone, "out_channels"): raise ValueError( "backbone should contain an attribute out_channels " "specifying the number of output channels (assumed to be the " "same for all the levels)") assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))) assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))) if num_classes is not None: if box_predictor is not None: raise ValueError("num_classes should be None when box_predictor is specified") else: if box_predictor is None: raise ValueError("num_classes should not be None when box_predictor " "is not specified") out_channels = backbone.out_channels if rpn_anchor_generator is None: anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator( anchor_sizes, aspect_ratios ) if rpn_head is None: rpn_head = RPNHead( out_channels, rpn_anchor_generator.num_anchors_per_location()[0] ) rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) rpn = RegionProposalNetwork( rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3'], output_size=7, sampling_ratio=2) if box_head is None: resolution = box_roi_pool.output_size[0] representation_size = 1024 box_head = TwoMLPHead( out_channels * resolution ** 2, representation_size) if box_predictor is None: representation_size = 1024 box_predictor = FastRCNNPredictor( representation_size, num_classes) roi_heads = RoIHeads( # Box box_roi_pool, box_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction, bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img) if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)
num_classes = 2 fine_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, num_classes=num_classes, pretrained_backbone=False) coarse_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, num_classes=num_classes, pretrained_backbone=False) # # # # replace the classifier with a new one, that has # # # # num_classes which is user-defined # # # get number of input features for the classifier fine_in_features = fine_model.roi_heads.box_predictor.cls_score.in_features coarse_in_features = coarse_model.roi_heads.box_predictor.cls_score.in_features # # # replace the pre-trained head with a new one fine_model.roi_heads.box_predictor = FastRCNNPredictor(fine_in_features, num_classes) coarse_model.roi_heads.box_predictor = FastRCNNPredictor(coarse_in_features, num_classes) for fine_p, coarse_p in zip(fine_model.parameters(), coarse_model.parameters()): fine_p.requires_grad = True coarse_p.requires_grad = True fine_model.to(device) coarse_model.to(device) # Optimizer fine_params = [p for p in fine_model.parameters() if p.requires_grad] coarse_params = [p for p in coarse_model.parameters() if p.requires_grad] fine_optim = torch.optim.SGD(fine_params, lr=0.005, momentum=0.9, weight_decay=0.0005) coarse_optim = torch.optim.SGD(coarse_params, lr=0.005, momentum=0.9, weight_decay=0.0005)
# videosti = "C:/Users/elleh/Downloads/IMG_0412.mp4" videosti = '../../../SwimData/SwimCodes/temp/A-H/H.mp4' #define the device device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') #define classnames classNames = ["A","B","C","D","E","F","G","H","False"] #Define the object detector model as objectDetector objectDetector = models.detection.fasterrcnn_resnet50_fpn() num_classes = 2 in_features = objectDetector.roi_heads.box_predictor.cls_score.in_features objectDetector.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) objectDetector.load_state_dict(torch.load("../../../SwimData/SwimCodes/objectDetection/models/RCNN_13nov.pth", map_location=device)) objectDetector.eval() objectDetector.to(device) # #Define the classifier # classifier = models.vgg19(pretrained=False,progress=False) # classifier.classifier[6] = nn.Linear(in_features=4096,out_features=len(classNames),bias=True) # classifier.load_state_dict(torch.load("../../../SwimData/SwimCodes/classification3/models/5_0.9612403100775194.pth", # map_location=device)) # classifier = classifier.to(device) # classtrans = transforms.Compose([ transforms.Resize((256,256)), # transforms.ToTensor(), # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
pin_memory=args.pin_memory) testloader = data.DataLoader(testset, args.batch_size) if args.detection: print("Initialize Training Mode: {}".format(args.mode)) if args.mode == 'mtcnn': # model #mtcnn = MTCNN(image_size=224, keep_all=True, device=device) model = InceptionResnetV1(pretrained='vggface2', classify=True, num_classes=3).to(device) elif args.mode == 'faster_rcnn': # model model = fasterrcnn_resnet50_fpn(pretrained=True).to(device) in_feat = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_feat, 4).to(device) else: print( "Error: Training Mode {} is not defined for detection dataset!" .format(args.mode)) else: # set mode to transfer learning, if layer number of mobilenet is given if args.layer is not None: args.mode = 'transfer' # model print("Initialize Training Mode: {}".format(args.mode)) if args.mode == 'from_scratch': model = models.mobilenet_v2(pretrained=False).features.to(device) model.classifier = nn.Sequential( nn.Dropout(p=0.2, inplace=False), nn.Flatten(),
def _get_instance_segmentation_model(num_classes): model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) return model
transforms=train_transform) # 收集batch data的函数 def collate_fn(batch): return tuple(zip(*batch)) train_loader = DataLoader(train_set, batch_size=batch_size, collate_fn=collate_fn) # step 2: model model = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=True) in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) # replace the pre-trained head with a new one model.to(device) # step 3: loss # in lib/python3.6/site-packages/torchvision/models/detection/roi_heads.py # def fastrcnn_loss(class_logits, box_regression, labels, regression_targets) # step 4: optimizer scheduler params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=LR, momentum=0.9, weight_decay=0.0005) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
def build_model(cls, args, task): """Build a new model instance.""" # make sure that all args are properly defaulted (in case there are any new ones) base_architecture(args) rpn_anchor_generator = task.rpn_anchor_generator rpn_head = task.rpn_head box_roi_pool = task.box_roi_pool box_predictor = task.box_predictor box_head = task.box_head # setup backbone backbone = resnet_fpn_backbone(args.backbone, args.backbone_pretrained) if not hasattr(backbone, "out_channels"): raise ValueError( "backbone should contain an attribute out_channels " "specifying the number of output channels (assumed to be the " "same for all the levels)" ) assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))) assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))) if task.num_classes > 0: if box_predictor is not None: raise ValueError("num_classes should be -1 when box_predictor is specified") else: if box_predictor is None: raise ValueError("num_classes should be > 0 when box_predictor is not specified") out_channels = backbone.out_channels if rpn_anchor_generator is None: anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) if rpn_head is None: rpn_head = RPNHead( out_channels, rpn_anchor_generator.num_anchors_per_location()[0], ) rpn_pre_nms_top_n = dict(training=args.rpn_pre_nms_top_n_train, testing=args.rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=args.rpn_post_nms_top_n_train, testing=args.rpn_post_nms_top_n_test) rpn = RPN( rpn_anchor_generator, rpn_head, args.rpn_fg_iou_thresh, args.rpn_bg_iou_thresh, args.rpn_batch_size_per_image, args.rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, args.rpn_nms_thresh, ) if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign( featmap_names=[0, 1, 2, 3], output_size=7, sampling_ratio=2, ) if box_head is None: resolution = box_roi_pool.output_size[0] representation_size = 1024 box_head = TwoMLPHead( out_channels * resolution ** 2, representation_size, ) if box_predictor is None: representation_size = 1024 box_predictor = FastRCNNPredictor( representation_size, task.num_classes, ) roi_heads = RegionOfInterestHeads( # Box box_roi_pool, box_head, box_predictor, args.box_fg_iou_thresh, args.box_bg_iou_thresh, args.box_batch_size_per_image, args.box_positive_fraction, args.bbox_reg_weights, args.box_score_thresh, args.box_nms_thresh, args.box_detections_per_img, ) if args.image_mean is None: args.image_mean = [0.485, 0.456, 0.406] if args.image_std is None: args.image_std = [0.229, 0.224, 0.225] transform = GeneralizedRCNNTransform( args.min_size, args.max_size, args.image_mean, args.image_std, ) return cls(backbone, rpn, roi_heads, transform)
def main( experiment_name: str, gpus: Union[str, List[str], str] = "auto", nproc_per_node: Union[int, str] = "auto", dataset_root: str = "./dataset", log_dir: str = "./log", model: str = "fasterrcnn_resnet50_fpn", epochs: int = 13, batch_size: int = 4, lr: int = 0.01, download: bool = False, image_size: int = 256, resume_from: str = None, ) -> None: """ Args: experiment_name: the name of each run dataset_root: dataset root directory for VOC2012 Dataset gpus: can be "auto", "none" or number of gpu device ids like "0,1" log_dir: where to put all the logs epochs: number of epochs to train model: model to use, possible options are "fasterrcnn_resnet50_fpn", "fasterrcnn_mobilenet_v3_large_fpn", "fasterrcnn_mobilenet_v3_large_320_fpn" batch_size: batch size lr: initial learning rate download: whether to automatically download dataset device: either cuda or cpu image_size: image size for training and validation resume_from: path of checkpoint to resume from """ if model not in AVAILABLE_MODELS: raise RuntimeError(f"Invalid model name: {model}") if isinstance(gpus, int): gpus = (gpus, ) if isinstance(gpus, tuple): os.environ["CUDA_VISIBLE_DEVICES"] = ",".join( [str(gpu) for gpu in gpus]) elif gpus == "auto": gpus = tuple(range(torch.cuda.device_count())) elif gpus == "none": os.environ["CUDA_VISIBLE_DEVICES"] = "" gpus = tuple() ngpu = len(gpus) backend = "nccl" if ngpu > 0 else "gloo" if nproc_per_node == "auto": nproc_per_node = ngpu if ngpu > 0 else max( multiprocessing.cpu_count() // 2, 1) # to precent multiple download for preatrined checkpoint, create model in the main process model = getattr(detection, model)(pretrained=True) if model.__class__.__name__ == "FasterRCNN": in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 21) elif model.__class__.__name__ == "RetinaNet": head = RetinaNetClassificationHead( model.backbone.out_channels, model.anchor_generator.num_anchors_per_location()[0], num_classes=21) model.head.classification_head = head with idist.Parallel(backend=backend, nproc_per_node=nproc_per_node) as parallel: parallel.run( run, "cuda" if ngpu > 0 else "cpu", experiment_name, gpus, dataset_root, log_dir, model, epochs, batch_size, lr, download, image_size, resume_from, )
def main(): # train on the GPU or on the CPU device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') #device = torch.device('cpu') # Create datasets with the right transforms train_dataset = porpoise_dataset(DATA_PATH, TRANSFORM_TRAIN) val_dataset = porpoise_dataset(DATA_PATH, TRANSFORM_VAL) # Spiltting the dataset train and validation 90/10 split_pct = int(len(train_dataset) * TRAIN_SPLIT) indices = torch.randperm(len(train_dataset)).tolist() train_dataset = torch.utils.data.Subset(train_dataset, indices[:-split_pct]) val_dataset = torch.utils.data.Subset(val_dataset, indices[-split_pct:]) dataloader_train = data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True, collate_fn=utils.collate_fn) dataloader_val = data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True, collate_fn=utils.collate_fn) #Change anchor sizes anchor_sizes = ((16, ), (32, ), (64, ), (128, ), (256, )) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) # Using pretrained resnet50 model model = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=True, rpn_anchor_generator=rpn_anchor_generator) # replace the classifier with a clasifier for only porpoise and bg num_classes = 2 in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) # Send to GPU model.to(device) # construct an optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) # train it for 10 epochs num_epochs = 10 for epoch in range(num_epochs): # train for one epoch, printing every 10 iterations train_one_epoch(model, optimizer, dataloader_train, device, epoch, print_freq=10) # evaluate on the test dataset evaluate(model, dataloader_val, device=device) torch.save(model, DATA_PATH + "/model_16_10e_2905")
def __init__(self, out_channels, num_classes, input_mode, acf_head, fg_iou_thresh=0.5, bg_iou_thresh=0.5, batch_size_per_image=512, positive_fraction=0.25, bbox_reg_weights=None, box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100): super(RoIHeadsExtend, self).__init__() self.in_channels = out_channels self.input_mode = input_mode self.score_thresh = box_score_thresh self.nms_thresh = box_nms_thresh self.detections_per_img = box_detections_per_img self.fg_iou_thresh = fg_iou_thresh self.bg_iou_thresh = bg_iou_thresh self.batch_size_per_image = batch_size_per_image self.positive_fraction = positive_fraction self.num_classes = num_classes # Detection self.box_similarity = box_ops.box_iou # assign ground-truth boxes for each proposal self.proposal_matcher = det_utils.Matcher( fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( batch_size_per_image, positive_fraction) if bbox_reg_weights is None: bbox_reg_weights = (10., 10., 5., 5.) self.box_coder = det_utils.BoxCoder(bbox_reg_weights) self.box_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3], output_size=7, sampling_ratio=2) representation_size = 1024 resolution = self.box_roi_pool.output_size[0] self.box_head = TwoMLPHead(out_channels * resolution**2, representation_size) self.box_predictor = FastRCNNPredictor(representation_size, num_classes) # Segmentation self.shared_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3], output_size=14, sampling_ratio=2) resolution = self.shared_roi_pool.output_size[0] mask_layers = (256, 256, 256, 256, 256, 256, 256, 256) mask_dilation = 1 self.mask_head = MaskRCNNHeads(out_channels, mask_layers, mask_dilation) mask_predictor_in_channels = 256 # == mask_layers[-1] mask_dim_reduced = 256 self.mask_predictor = MaskRCNNPredictor(mask_predictor_in_channels, mask_dim_reduced, num_classes) self.with_paf_branch = True if self.with_paf_branch: self.paf_head = MaskRCNNHeads(out_channels, mask_layers, mask_dilation) self.paf_predictor = MaskRCNNPredictor(mask_predictor_in_channels, mask_dim_reduced, 2 * (num_classes - 1)) if self.input_mode == config.INPUT_RGBD: self.attention_block = ContextBlock(256, 2) self.global_feature_dim = 256 self.with_3d_keypoints = True self.with_axis_keypoints = False self.regress_axis = False self.estimate_norm_vector = False if acf_head == 'endpoints': self.with_axis_keypoints = True elif acf_head == 'scatters': self.regress_axis = True elif acf_head == 'norm_vector': self.estimate_norm_vector = True else: print("Don't assign a vaild acf head") exit() keypoint_layers = (256, ) * 4 self.keypoint_dim_reduced = keypoint_layers[-1] if self.with_3d_keypoints: self.vote_keypoint_head = Vote_Kpoints_head( self.global_feature_dim, keypoint_layers, "conv2d") self.vote_keypoint_predictor = Vote_Kpoints_Predictor( self.keypoint_dim_reduced, 3 * (num_classes - 1)) if self.with_axis_keypoints: self.orientation_keypoint_head = Vote_Kpoints_head( self.global_feature_dim, keypoint_layers, "conv2d") self.orientation_keypoint_predictor = Vote_Kpoints_Predictor( self.keypoint_dim_reduced, 6 * (num_classes - 1)) if self.regress_axis: self.axis_head = Vote_Kpoints_head(self.global_feature_dim, keypoint_layers, "conv2d") self.axis_predictor = Vote_Kpoints_Predictor( self.keypoint_dim_reduced, 4 * (num_classes - 1)) if self.estimate_norm_vector: self.norm_vector_head = Vote_Kpoints_head( self.global_feature_dim, keypoint_layers, "conv2d") self.norm_vector_predictor = Vote_Kpoints_Predictor( self.keypoint_dim_reduced, 3 * (num_classes - 1))
if self.transforms is not None: img, target = self.transforms(img, target) return img, target def __len__(self): return len(self.imgs) import torchvision from torchvision.models.detection.faster_rcnn import FastRCNNPredictor model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) num_classes = 2 in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor def get_model_instance_segmentation(num_classes): model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True) in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes) return model
def main(score_th=0.25): N_CLASSES = len(CLASS_NAMES_Vin) torch.backends.cudnn.benchmark = True #classification pre-trained model CKPT_PATH = '/data/pycode/CXRAD/ckpt/SANet.pkl' cls_model = SANet(num_classes=N_CLASSES) if os.path.exists(CKPT_PATH): checkpoint = torch.load(CKPT_PATH) cls_model.load_state_dict(checkpoint) #strict=False print( "=> Loaded well-trained SANet model checkpoint of Vin-CXR dataset: " + CKPT_PATH) cls_model = cls_model.cuda() cls_model.eval() #detection pre-trained model od_model = torchvision.models.detection.maskrcnn_resnet50_fpn( pretrained=True) in_features = od_model.roi_heads.box_predictor.cls_score.in_features od_model.roi_heads.box_predictor = FastRCNNPredictor( in_features, N_CLASSES) in_features_mask = od_model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 od_model.roi_heads.mask_predictor = MaskRCNNPredictor( in_features_mask, hidden_layer, N_CLASSES) # CKPT_PATH = '/data/pycode/CXRAD/ckpt/Maskrcnn.pkl' if os.path.exists(CKPT_PATH): checkpoint = torch.load(CKPT_PATH) od_model.load_state_dict(checkpoint) #strict=False print( "=> Loaded well-trained Maskrcnn model checkpoint of Vin-CXR dataset: " + CKPT_PATH) od_model = od_model.cuda() od_model.eval() #CVTE-CXR dataset cvte_csv_file = '/data/pycode/CXRAD/dataset/cvte_test.txt' #testing file patt cvte_image_dir = '/data/fjsdata/CVTEDR/images/' #image path # test images and show the results images = pd.read_csv(cvte_csv_file, sep=',', header=None).values gt, pred, box = [], [], [] for image in images: gt.append(image[1]) img = cvte_image_dir + image[0] image = Image.open(img).convert('RGB') image = torch.unsqueeze(transform_seq(image), 0) var_image = torch.autograd.Variable(image).cuda() #generate classification result var_output = cls_model(var_image) #forward prob_cls = 1 - var_output[0].data.cpu()[0].numpy() #generate detection result var_output = od_model(var_image) #dict boxes = var_output[0]['boxes'].data.cpu().numpy() scores = var_output[0]['scores'].data.cpu().numpy() if len(scores) > 0: ind = np.argmax(scores) pred.append(max([prob_cls, scores[ind]])) box.append(boxes[ind]) else: pred.append(prob_cls) box.append([0, 0, 1, 1]) sys.stdout.write('\r image process: = {}'.format(len(pred))) sys.stdout.flush() #evaluation gt_np = np.array(gt) pred_np = np.array(pred) box = np.arrya(box) assert gt_np.shape == pred_np.shape #AUROCS AUROCs = roc_auc_score(gt_np, pred_np) print('AUROC = {:.4f}'.format(AUROCs)) #sensitivity and specificity pred_np = np.where(pred_np > score_th, 1, 0) tn, fp, fn, tp = confusion_matrix(gt_np, pred_np).ravel() sen = tp / (tp + fn) spe = tn / (tn + fp) print('\r\rSen = {:.4f} and Spe = {:.4f}'.format(sen, spe))