Пример #1
0
def create_model(num_classes, device):
    # 注意,这里的backbone默认使用的是FrozenBatchNorm2d,即不会去更新bn参数
    # 目的是为了防止batch_size太小导致效果更差(如果显存很小,建议使用默认的FrozenBatchNorm2d)
    # 如果GPU显存很大可以设置比较大的batch_size就可以将norm_layer设置为普通的BatchNorm2d
    # trainable_layers包括['layer4', 'layer3', 'layer2', 'layer1', 'conv1'], 5代表全部训练
    backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d,
                                     trainable_layers=3)
    # 训练自己数据集时不要修改这里的91,修改的是传入的num_classes参数
    model = FasterRCNN(backbone=backbone, num_classes=91)
    # 载入预训练模型权重
    # https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
    weights_dict = torch.load("./backbone/fasterrcnn_resnet50_fpn_coco.pth",
                              map_location=device)
    missing_keys, unexpected_keys = model.load_state_dict(weights_dict,
                                                          strict=False)
    if len(missing_keys) != 0 or len(unexpected_keys) != 0:
        print("missing_keys: ", missing_keys)
        print("unexpected_keys: ", unexpected_keys)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model
def create_model(num_classes, device):
    # https://download.pytorch.org/models/vgg16-397923af.pth
    # 如果使用mobilenetv2的话就下载对应预训练权重并注释下面三行,接着把mobilenetv2模型对应的两行代码注释取消掉
    vgg_feature = vgg(model_name="vgg16",
                      weights_path="./backbone/vgg16.pth").features
    backbone = torch.nn.Sequential(*list(
        vgg_feature._modules.values())[:-1])  # 删除feature中最后的maxpool层
    backbone.out_channels = 512

    # https://download.pytorch.org/models/mobilenet_v2-b0353104.pth
    # backbone = MobileNetV2(weights_path="./backbone/mobilenet_v2.pth").features
    # backbone.out_channels = 1280  # 设置对应backbone输出特征矩阵的channels

    anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512), ),
                                        aspect_ratios=((0.5, 1.0, 2.0), ))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(
        featmap_names=['0'],  # 在哪些特征层上进行roi pooling
        output_size=[7, 7],  # roi_pooling输出特征矩阵尺寸
        sampling_ratio=2)  # 采样率

    model = FasterRCNN(backbone=backbone,
                       num_classes=num_classes,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)

    return model
Пример #3
0
def create_model(num_classes):

    # 如果训练的时候没有使用冻结BN,预测的时候也不要使用
    # mobileNetv2+faster_RCNN
    # backbone = MobileNetV2().features
    # backbone.out_channels = 1280
    #
    # anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),
    #                                     aspect_ratios=((0.5, 1.0, 2.0),))
    #
    # roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
    #                                                 output_size=[7, 7],
    #                                                 sampling_ratio=2)
    #
    # model = FasterRCNN(backbone=backbone,
    #                    num_classes=num_classes,
    #                    rpn_anchor_generator=anchor_generator,
    #                    box_roi_pool=roi_pooler)

    # resNet50+fpn+faster_RCNN
    # 注意,这里的norm_layer要和训练脚本中保持一致
    backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d)
    model = FasterRCNN(backbone=backbone,
                       num_classes=num_classes,
                       rpn_score_thresh=0.5)

    return model
def create_model(num_classes):
    # https://download.pytorch.org/models/mobilenet_v2-b0353104.pth
    backbone = MobileNetV2(weights_path="./backbone/mobilenet_v2.pth").features
    backbone.out_channels = 1280  # 设置对应backbone输出特征矩阵的channels

    anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),
                                        aspect_ratios=((0.5, 1.0, 2.0),))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],  # 在哪些特征层上进行roi pooling
                                                    output_size=[7, 7],   # roi_pooling输出特征矩阵尺寸
                                                    sampling_ratio=2)  # 采样率

    model = FasterRCNN(backbone=backbone,
                       num_classes=num_classes,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)

    return model
def create_model(num_classes):
    vgg_feature = vgg(model_name="vgg16").features
    backbone = torch.nn.Sequential(*list(
        vgg_feature._modules.values())[:-1])  # 删除feature中最后的maxpool层
    backbone.out_channels = 512

    anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512), ),
                                        aspect_ratios=((0.5, 1.0, 2.0), ))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(
        featmap_names=['0'],  # 在哪些特征层上进行roi pooling
        output_size=[7, 7],  # roi_pooling输出特征矩阵尺寸
        sampling_ratio=2)  # 采样率

    model = FasterRCNN(backbone=backbone,
                       num_classes=num_classes,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)

    return model
Пример #6
0
def create_model(num_classes):
    import torchvision
    from torchvision.models.feature_extraction import create_feature_extractor

    # vgg16
    backbone = torchvision.models.vgg16_bn(pretrained=False)
    # print(backbone)
    backbone = create_feature_extractor(backbone, return_nodes={"features.42": "0"})
    # out = backbone(torch.rand(1, 3, 224, 224))
    # print(out["0"].shape)
    backbone.out_channels = 512

    # resnet50 backbone
    # backbone = torchvision.models.resnet50(pretrained=False)
    # # print(backbone)
    # backbone = create_feature_extractor(backbone, return_nodes={"layer3": "0"})
    # # out = backbone(torch.rand(1, 3, 224, 224))
    # # print(out["0"].shape)
    # backbone.out_channels = 1024

    # EfficientNetB0
    # backbone = torchvision.models.efficientnet_b0(pretrained=False)
    # # print(backbone)
    # backbone = create_feature_extractor(backbone, return_nodes={"features.5": "0"})
    # # out = backbone(torch.rand(1, 3, 224, 224))
    # # print(out["0"].shape)
    # backbone.out_channels = 112

    anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),
                                        aspect_ratios=((0.5, 1.0, 2.0),))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],  # 在哪些特征层上进行RoIAlign pooling
                                                    output_size=[7, 7],  # RoIAlign pooling输出特征矩阵尺寸
                                                    sampling_ratio=2)  # 采样率

    model = FasterRCNN(backbone=backbone,
                       num_classes=num_classes,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)

    return model
def create_model(num_classes):
    # https://download.pytorch.org/models/vgg16-397923af.pth
    # 如果使用vgg16的话(不建议使用,效果不好)就下载对应预训练权重并取消下面两行注释,接着把mobilenetv2模型对应的两行代码注释掉
    # backbone = vgg(model_name="vgg16", weights_path="./backbone/vgg16.pth").features
    # backbone.out_channels = 512

    # https://download.pytorch.org/models/mobilenet_v2-b0353104.pth
    backbone = MobileNetV2(weights_path="./backbone/mobilenet_v2.pth").features
    backbone.out_channels = 1280  # 设置对应backbone输出特征矩阵的channels

    anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),
                                        aspect_ratios=((0.5, 1.0, 2.0),))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],  # 在哪些特征层上进行roi pooling
                                                    output_size=[7, 7],   # roi_pooling输出特征矩阵尺寸
                                                    sampling_ratio=2)  # 采样率

    model = FasterRCNN(backbone=backbone,
                       num_classes=num_classes,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)

    return model
Пример #8
0
def main(parser_data):
    device = torch.device(
        parser_data.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    data_transform = {"val": transforms.Compose([transforms.ToTensor()])}

    # read class_indict
    label_json_path = './pascal_voc_classes.json'
    assert os.path.exists(
        label_json_path), "json file {} dose not exist.".format(
            label_json_path)
    json_file = open(label_json_path, 'r')
    class_dict = json.load(json_file)
    category_index = {v: k for k, v in class_dict.items()}

    VOC_root = parser_data.data_path
    # check voc root
    if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False:
        raise FileNotFoundError(
            "VOCdevkit dose not in path:'{}'.".format(VOC_root))

    # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch
    batch_size = parser_data.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
              8])  # number of workers
    print('Using %g dataloader workers' % nw)

    # load validation data set
    val_data_set = VOC2012DataSet(VOC_root, data_transform["val"], "val.txt")
    val_data_set_loader = torch.utils.data.DataLoader(
        val_data_set,
        batch_size=batch_size,
        shuffle=False,
        num_workers=nw,
        pin_memory=True,
        collate_fn=val_data_set.collate_fn)

    # create model num_classes equal background + 20 classes
    # 注意,这里的norm_layer要和训练脚本中保持一致
    backbone = resnet50_fpn_backbone(norm_layer=torch.nn.BatchNorm2d)
    model = FasterRCNN(backbone=backbone,
                       num_classes=parser_data.num_classes + 1)

    # 载入你自己训练好的模型权重
    weights_path = parser_data.weights
    assert os.path.exists(weights_path), "not found {} file.".format(
        weights_path)
    weights_dict = torch.load(weights_path, map_location=device)
    model.load_state_dict(weights_dict['model'])
    # print(model)

    model.to(device)

    # evaluate on the test dataset
    coco = get_coco_api_from_dataset(val_data_set)
    iou_types = ["bbox"]
    coco_evaluator = CocoEvaluator(coco, iou_types)
    cpu_device = torch.device("cpu")

    model.eval()
    with torch.no_grad():
        for image, targets in tqdm(val_data_set_loader, desc="validation..."):
            # 将图片传入指定设备device
            image = list(img.to(device) for img in image)

            # inference
            outputs = model(image)

            outputs = [{k: v.to(cpu_device)
                        for k, v in t.items()} for t in outputs]
            res = {
                target["image_id"].item(): output
                for target, output in zip(targets, outputs)
            }
            coco_evaluator.update(res)

    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()

    coco_eval = coco_evaluator.coco_eval["bbox"]
    # calculate COCO info for all classes
    coco_stats, print_coco = summarize(coco_eval)

    # calculate voc info for every classes(IoU=0.5)
    voc_map_info_list = []
    for i in range(len(category_index)):
        stats, _ = summarize(coco_eval, catId=i)
        voc_map_info_list.append(" {:15}: {}".format(category_index[i + 1],
                                                     stats[1]))

    print_voc = "\n".join(voc_map_info_list)
    print(print_voc)

    # 将验证结果保存至txt文件中
    with open("record_mAP.txt", "w") as f:
        record_lines = [
            "COCO results:", print_coco, "", "mAP(IoU=0.5) for each category:",
            print_voc
        ]
        f.write("\n".join(record_lines))
Пример #9
0
def main(parser_data):
    device = torch.device(
        parser_data.device if torch.cuda.is_available() else "cpu")
    print("Using {} device training.".format(device.type))

    data_transform = {"val": transforms.Compose([transforms.ToTensor()])}

    # read class_indict
    label_json_path = './coco80_indices.json'
    assert os.path.exists(
        label_json_path), "json file {} dose not exist.".format(
            label_json_path)
    json_file = open(label_json_path, 'r')
    category_index = json.load(json_file)

    coco_root = parser_data.data_path

    # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch
    batch_size = parser_data.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
              8])  # number of workers
    print('Using %g dataloader workers' % nw)

    # load validation data set
    val_dataset = CocoDetection(coco_root, "val", data_transform["val"])
    val_dataset_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        pin_memory=True,
        num_workers=nw,
        collate_fn=val_dataset.collate_fn)

    # create model
    vgg_feature = vgg(model_name="vgg16",
                      weights_path="./backbone/vgg16.pth").features
    backbone = torch.nn.Sequential(*list(
        vgg_feature._modules.values())[:-1])  # 删除feature中最后的maxpool层
    backbone.out_channels = 512

    anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512), ),
                                        aspect_ratios=((0.5, 1.0, 2.0), ))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(
        featmap_names=['0'],  # 在哪些特征层上进行roi pooling
        output_size=[7, 7],  # roi_pooling输出特征矩阵尺寸
        sampling_ratio=2)  # 采样率

    # num_classes equal 80 + background classes
    model = FasterRCNN(backbone=backbone,
                       num_classes=parser_data.num_classes + 1,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)

    # 载入你自己训练好的模型权重
    weights_path = parser_data.weights
    assert os.path.exists(weights_path), "not found {} file.".format(
        weights_path)
    weights_dict = torch.load(weights_path, map_location=device)
    model.load_state_dict(weights_dict['model'])
    # print(model)

    model.to(device)

    # evaluate on the val dataset
    cpu_device = torch.device("cpu")
    coco91to80 = val_dataset.coco91to80
    coco80to91 = dict([(str(v), k) for k, v in coco91to80.items()])
    results = []

    model.eval()
    with torch.no_grad():
        for image, targets in tqdm(val_dataset_loader, desc="validation..."):
            # 将图片传入指定设备device
            image = list(img.to(device) for img in image)

            # inference
            outputs = model(image)

            outputs = [{k: v.to(cpu_device)
                        for k, v in t.items()} for t in outputs]

            # 遍历每张图像的预测结果
            for target, output in zip(targets, outputs):
                if len(output) == 0:
                    continue

                img_id = int(target["image_id"])
                per_image_boxes = output["boxes"]
                # 对于coco_eval, 需要的每个box的数据格式为[x_min, y_min, w, h]
                # 而我们预测的box格式是[x_min, y_min, x_max, y_max],所以需要转下格式
                per_image_boxes[:, 2:] -= per_image_boxes[:, :2]
                per_image_classes = output["labels"]
                per_image_scores = output["scores"]

                # 遍历每个目标的信息
                for object_score, object_class, object_box in zip(
                        per_image_scores, per_image_classes, per_image_boxes):
                    object_score = float(object_score)
                    # 要将类别信息还原回coco91中
                    coco80_class = int(object_class)
                    coco91_class = int(coco80to91[str(coco80_class)])
                    # We recommend rounding coordinates to the nearest tenth of a pixel
                    # to reduce resulting JSON file size.
                    object_box = [round(b, 2) for b in object_box.tolist()]

                    res = {
                        "image_id": img_id,
                        "category_id": coco91_class,
                        "bbox": object_box,
                        "score": round(object_score, 3)
                    }
                    results.append(res)

    # accumulate predictions from all images
    # write predict results into json file
    json_str = json.dumps(results, indent=4)
    with open('predict_tmp.json', 'w') as json_file:
        json_file.write(json_str)

    # accumulate predictions from all images
    coco_true = val_dataset.coco
    coco_pre = coco_true.loadRes('predict_tmp.json')

    coco_evaluator = COCOeval(cocoGt=coco_true,
                              cocoDt=coco_pre,
                              iouType="bbox")
    coco_evaluator.evaluate()
    coco_evaluator.accumulate()
    coco_evaluator.summarize()

    # calculate COCO info for all classes
    coco_stats, print_coco = summarize(coco_evaluator)

    # calculate voc info for every classes(IoU=0.5)
    voc_map_info_list = []
    for i in range(len(category_index)):
        stats, _ = summarize(coco_evaluator, catId=i)
        voc_map_info_list.append(" {:15}: {}".format(
            category_index[str(i + 1)], stats[1]))

    print_voc = "\n".join(voc_map_info_list)
    print(print_voc)

    # 将验证结果保存至txt文件中
    with open("record_mAP.txt", "w") as f:
        record_lines = [
            "COCO results:", print_coco, "", "mAP(IoU=0.5) for each category:",
            print_voc
        ]
        f.write("\n".join(record_lines))