def create_model(num_classes, device): # 注意,这里的backbone默认使用的是FrozenBatchNorm2d,即不会去更新bn参数 # 目的是为了防止batch_size太小导致效果更差(如果显存很小,建议使用默认的FrozenBatchNorm2d) # 如果GPU显存很大可以设置比较大的batch_size就可以将norm_layer设置为普通的BatchNorm2d # trainable_layers包括['layer4', 'layer3', 'layer2', 'layer1', 'conv1'], 5代表全部训练 backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d, trainable_layers=3) # 训练自己数据集时不要修改这里的91,修改的是传入的num_classes参数 model = FasterRCNN(backbone=backbone, num_classes=91) # 载入预训练模型权重 # https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth weights_dict = torch.load("./backbone/fasterrcnn_resnet50_fpn_coco.pth", map_location=device) missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False) if len(missing_keys) != 0 or len(unexpected_keys) != 0: print("missing_keys: ", missing_keys) print("unexpected_keys: ", unexpected_keys) # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) return model
def create_model(num_classes, device): # https://download.pytorch.org/models/vgg16-397923af.pth # 如果使用mobilenetv2的话就下载对应预训练权重并注释下面三行,接着把mobilenetv2模型对应的两行代码注释取消掉 vgg_feature = vgg(model_name="vgg16", weights_path="./backbone/vgg16.pth").features backbone = torch.nn.Sequential(*list( vgg_feature._modules.values())[:-1]) # 删除feature中最后的maxpool层 backbone.out_channels = 512 # https://download.pytorch.org/models/mobilenet_v2-b0353104.pth # backbone = MobileNetV2(weights_path="./backbone/mobilenet_v2.pth").features # backbone.out_channels = 1280 # 设置对应backbone输出特征矩阵的channels anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign( featmap_names=['0'], # 在哪些特征层上进行roi pooling output_size=[7, 7], # roi_pooling输出特征矩阵尺寸 sampling_ratio=2) # 采样率 model = FasterRCNN(backbone=backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model
def create_model(num_classes): # 如果训练的时候没有使用冻结BN,预测的时候也不要使用 # mobileNetv2+faster_RCNN # backbone = MobileNetV2().features # backbone.out_channels = 1280 # # anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),), # aspect_ratios=((0.5, 1.0, 2.0),)) # # roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], # output_size=[7, 7], # sampling_ratio=2) # # model = FasterRCNN(backbone=backbone, # num_classes=num_classes, # rpn_anchor_generator=anchor_generator, # box_roi_pool=roi_pooler) # resNet50+fpn+faster_RCNN # 注意,这里的norm_layer要和训练脚本中保持一致 backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d) model = FasterRCNN(backbone=backbone, num_classes=num_classes, rpn_score_thresh=0.5) return model
def create_model(num_classes): # https://download.pytorch.org/models/mobilenet_v2-b0353104.pth backbone = MobileNetV2(weights_path="./backbone/mobilenet_v2.pth").features backbone.out_channels = 1280 # 设置对应backbone输出特征矩阵的channels anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),)) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], # 在哪些特征层上进行roi pooling output_size=[7, 7], # roi_pooling输出特征矩阵尺寸 sampling_ratio=2) # 采样率 model = FasterRCNN(backbone=backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model
def create_model(num_classes): vgg_feature = vgg(model_name="vgg16").features backbone = torch.nn.Sequential(*list( vgg_feature._modules.values())[:-1]) # 删除feature中最后的maxpool层 backbone.out_channels = 512 anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign( featmap_names=['0'], # 在哪些特征层上进行roi pooling output_size=[7, 7], # roi_pooling输出特征矩阵尺寸 sampling_ratio=2) # 采样率 model = FasterRCNN(backbone=backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model
def create_model(num_classes): import torchvision from torchvision.models.feature_extraction import create_feature_extractor # vgg16 backbone = torchvision.models.vgg16_bn(pretrained=False) # print(backbone) backbone = create_feature_extractor(backbone, return_nodes={"features.42": "0"}) # out = backbone(torch.rand(1, 3, 224, 224)) # print(out["0"].shape) backbone.out_channels = 512 # resnet50 backbone # backbone = torchvision.models.resnet50(pretrained=False) # # print(backbone) # backbone = create_feature_extractor(backbone, return_nodes={"layer3": "0"}) # # out = backbone(torch.rand(1, 3, 224, 224)) # # print(out["0"].shape) # backbone.out_channels = 1024 # EfficientNetB0 # backbone = torchvision.models.efficientnet_b0(pretrained=False) # # print(backbone) # backbone = create_feature_extractor(backbone, return_nodes={"features.5": "0"}) # # out = backbone(torch.rand(1, 3, 224, 224)) # # print(out["0"].shape) # backbone.out_channels = 112 anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),)) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], # 在哪些特征层上进行RoIAlign pooling output_size=[7, 7], # RoIAlign pooling输出特征矩阵尺寸 sampling_ratio=2) # 采样率 model = FasterRCNN(backbone=backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model
def create_model(num_classes): # https://download.pytorch.org/models/vgg16-397923af.pth # 如果使用vgg16的话(不建议使用,效果不好)就下载对应预训练权重并取消下面两行注释,接着把mobilenetv2模型对应的两行代码注释掉 # backbone = vgg(model_name="vgg16", weights_path="./backbone/vgg16.pth").features # backbone.out_channels = 512 # https://download.pytorch.org/models/mobilenet_v2-b0353104.pth backbone = MobileNetV2(weights_path="./backbone/mobilenet_v2.pth").features backbone.out_channels = 1280 # 设置对应backbone输出特征矩阵的channels anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),)) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], # 在哪些特征层上进行roi pooling output_size=[7, 7], # roi_pooling输出特征矩阵尺寸 sampling_ratio=2) # 采样率 model = FasterRCNN(backbone=backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model
def main(parser_data): device = torch.device( parser_data.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) data_transform = {"val": transforms.Compose([transforms.ToTensor()])} # read class_indict label_json_path = './pascal_voc_classes.json' assert os.path.exists( label_json_path), "json file {} dose not exist.".format( label_json_path) json_file = open(label_json_path, 'r') class_dict = json.load(json_file) category_index = {v: k for k, v in class_dict.items()} VOC_root = parser_data.data_path # check voc root if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False: raise FileNotFoundError( "VOCdevkit dose not in path:'{}'.".format(VOC_root)) # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch batch_size = parser_data.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) # load validation data set val_data_set = VOC2012DataSet(VOC_root, data_transform["val"], "val.txt") val_data_set_loader = torch.utils.data.DataLoader( val_data_set, batch_size=batch_size, shuffle=False, num_workers=nw, pin_memory=True, collate_fn=val_data_set.collate_fn) # create model num_classes equal background + 20 classes # 注意,这里的norm_layer要和训练脚本中保持一致 backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d) model = FasterRCNN(backbone=backbone, num_classes=parser_data.num_classes + 1) # 载入你自己训练好的模型权重 weights_path = parser_data.weights assert os.path.exists(weights_path), "not found {} file.".format( weights_path) weights_dict = torch.load(weights_path, map_location=device) model.load_state_dict(weights_dict['model']) # print(model) model.to(device) # evaluate on the test dataset coco = get_coco_api_from_dataset(val_data_set) iou_types = ["bbox"] coco_evaluator = CocoEvaluator(coco, iou_types) cpu_device = torch.device("cpu") model.eval() with torch.no_grad(): for image, targets in tqdm(val_data_set_loader, desc="validation..."): # 将图片传入指定设备device image = list(img.to(device) for img in image) # inference outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] res = { target["image_id"].item(): output for target, output in zip(targets, outputs) } coco_evaluator.update(res) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() coco_eval = coco_evaluator.coco_eval["bbox"] # calculate COCO info for all classes coco_stats, print_coco = summarize(coco_eval) # calculate voc info for every classes(IoU=0.5) voc_map_info_list = [] for i in range(len(category_index)): stats, _ = summarize(coco_eval, catId=i) voc_map_info_list.append(" {:15}: {}".format(category_index[i + 1], stats[1])) print_voc = "\n".join(voc_map_info_list) print(print_voc) # 将验证结果保存至txt文件中 with open("record_mAP.txt", "w") as f: record_lines = [ "COCO results:", print_coco, "", "mAP(IoU=0.5) for each category:", print_voc ] f.write("\n".join(record_lines))
def main(parser_data): device = torch.device( parser_data.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) data_transform = {"val": transforms.Compose([transforms.ToTensor()])} # read class_indict label_json_path = './coco80_indices.json' assert os.path.exists( label_json_path), "json file {} dose not exist.".format( label_json_path) json_file = open(label_json_path, 'r') category_index = json.load(json_file) coco_root = parser_data.data_path # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch batch_size = parser_data.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) # load validation data set val_dataset = CocoDetection(coco_root, "val", data_transform["val"]) val_dataset_loader = torch.utils.data.DataLoader( val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_dataset.collate_fn) # create model vgg_feature = vgg(model_name="vgg16", weights_path="./backbone/vgg16.pth").features backbone = torch.nn.Sequential(*list( vgg_feature._modules.values())[:-1]) # 删除feature中最后的maxpool层 backbone.out_channels = 512 anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign( featmap_names=['0'], # 在哪些特征层上进行roi pooling output_size=[7, 7], # roi_pooling输出特征矩阵尺寸 sampling_ratio=2) # 采样率 # num_classes equal 80 + background classes model = FasterRCNN(backbone=backbone, num_classes=parser_data.num_classes + 1, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) # 载入你自己训练好的模型权重 weights_path = parser_data.weights assert os.path.exists(weights_path), "not found {} file.".format( weights_path) weights_dict = torch.load(weights_path, map_location=device) model.load_state_dict(weights_dict['model']) # print(model) model.to(device) # evaluate on the val dataset cpu_device = torch.device("cpu") coco91to80 = val_dataset.coco91to80 coco80to91 = dict([(str(v), k) for k, v in coco91to80.items()]) results = [] model.eval() with torch.no_grad(): for image, targets in tqdm(val_dataset_loader, desc="validation..."): # 将图片传入指定设备device image = list(img.to(device) for img in image) # inference outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] # 遍历每张图像的预测结果 for target, output in zip(targets, outputs): if len(output) == 0: continue img_id = int(target["image_id"]) per_image_boxes = output["boxes"] # 对于coco_eval, 需要的每个box的数据格式为[x_min, y_min, w, h] # 而我们预测的box格式是[x_min, y_min, x_max, y_max],所以需要转下格式 per_image_boxes[:, 2:] -= per_image_boxes[:, :2] per_image_classes = output["labels"] per_image_scores = output["scores"] # 遍历每个目标的信息 for object_score, object_class, object_box in zip( per_image_scores, per_image_classes, per_image_boxes): object_score = float(object_score) # 要将类别信息还原回coco91中 coco80_class = int(object_class) coco91_class = int(coco80to91[str(coco80_class)]) # We recommend rounding coordinates to the nearest tenth of a pixel # to reduce resulting JSON file size. object_box = [round(b, 2) for b in object_box.tolist()] res = { "image_id": img_id, "category_id": coco91_class, "bbox": object_box, "score": round(object_score, 3) } results.append(res) # accumulate predictions from all images # write predict results into json file json_str = json.dumps(results, indent=4) with open('predict_tmp.json', 'w') as json_file: json_file.write(json_str) # accumulate predictions from all images coco_true = val_dataset.coco coco_pre = coco_true.loadRes('predict_tmp.json') coco_evaluator = COCOeval(cocoGt=coco_true, cocoDt=coco_pre, iouType="bbox") coco_evaluator.evaluate() coco_evaluator.accumulate() coco_evaluator.summarize() # calculate COCO info for all classes coco_stats, print_coco = summarize(coco_evaluator) # calculate voc info for every classes(IoU=0.5) voc_map_info_list = [] for i in range(len(category_index)): stats, _ = summarize(coco_evaluator, catId=i) voc_map_info_list.append(" {:15}: {}".format( category_index[str(i + 1)], stats[1])) print_voc = "\n".join(voc_map_info_list) print(print_voc) # 将验证结果保存至txt文件中 with open("record_mAP.txt", "w") as f: record_lines = [ "COCO results:", print_coco, "", "mAP(IoU=0.5) for each category:", print_voc ] f.write("\n".join(record_lines))