Пример #1
0
    def __init__(self, anchors=None, o_sz=63, g_sz=127):
        super(SiamMask, self).__init__()
        self.anchors = anchors  # anchor_cfg
        self.anchor_num = len(self.anchors["ratios"]) * len(self.anchors["scales"])
        self.anchor = Anchors(anchors)
        self.features = None
        self.rpn_model = None
        self.mask_model = None
        self.o_sz = o_sz
        self.g_sz = g_sz
        self.upSample = nn.UpsamplingBilinear2d(size=[g_sz, g_sz])

        self.all_anchors = None
Пример #2
0
    def __init__(self, num_classes, block, layers):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3,
                               64,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        if block == BasicBlock:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv2.out_channels,
                self.layer3[layers[2] - 1].conv2.out_channels,
                self.layer4[layers[3] - 1].conv2.out_channels
            ]
        elif block == Bottleneck:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv3.out_channels,
                self.layer3[layers[2] - 1].conv3.out_channels,
                self.layer4[layers[3] - 1].conv3.out_channels
            ]

        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])

        self.regressionModel = RegressionModel(256)
        self.classificationModel = ClassificationModel(256,
                                                       num_classes=num_classes)

        self.anchors = Anchors()

        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        prior = 0.01

        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))

        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)

        self.freeze_bn()
Пример #3
0
 def __init__(self, backbone, return_layers, anchor_nums=3):
     super(RetinaFace, self).__init__()
     # if backbone_name == 'resnet50':
     #     self.backbone = resnet.resnet50(pretrained)
     # self.backbone = resnet.__dict__[backbone_name](pretrained=pretrained)
     # self.return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3}
     assert backbone, 'Backbone can not be none!'
     assert len(
         return_layers) > 0, 'There must be at least one return layers'
     self.body = _utils.IntermediateLayerGetter(backbone, return_layers)
     in_channels_stage2 = 256
     # in_channels_stage2 = 64
     in_channels_list = [
         #in_channels_stage2,
         in_channels_stage2 * 2,
         in_channels_stage2 * 4,
         in_channels_stage2 * 8,
     ]
     out_channels = 256
     self.fpn = FeaturePyramidNetwork(in_channels_list, out_channels)
     # self.ClassHead = ClassHead()
     # self.BboxHead = BboxHead()
     # self.LandmarkHead = LandmarkHead()
     self.ClassHead = self._make_class_head()
     self.BboxHead = self._make_bbox_head()
     self.LandmarkHead = self._make_landmark_head()
     self.anchors = Anchors()
     self.regressBoxes = RegressionTransform()
     self.losslayer = losses.LossLayer()
Пример #4
0
    def __init__(self, num_classes, block, layers, groups=1, width_per_group=64, replace_stride_with_dilation=None,
                 dropout1=0.25, dropout2=0.25, alpha=0.25, gamma=2.0,
                 loss_with_no_bboxes=False, no_bboxes_alpha=0.5, no_bboxes_gamma=2.0):
        #Has been changed to ResNext(customized by Yu Han Huang)
        self.inplanes = 64
        super(ResNet, self).__init__()
        #add self.dilation, width_per_group, replace_stride_with_dilation (customized by Yu Han Huang)
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        #add dilate=replace_stride_with_dilation (customized by Yu Han Huang)
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2])
        #add C2 layer_size to fpn_sizes (customized by Yu Han Huang)
        if block == BasicBlock:
            fpn_sizes = [self.layer1[layers[0]-1].conv2.out_channels, self.layer2[layers[1]-1].conv2.out_channels,
             self.layer3[layers[2]-1].conv2.out_channels, self.layer4[layers[3]-1].conv2.out_channels]
        elif block == BasicBlock:
            fpn_sizes = [self.layer1[layers[0]-1].conv3.out_channels, self.layer2[layers[1]-1].conv3.out_channels,
             self.layer3[layers[2]-1].conv3.out_channels, self.layer4[layers[3]-1].conv3.out_channels]
        #add fpn_sizes[0] into PyramidFeatures (customized by Yu Han Huang)
        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2], fpn_sizes[3])
        self.regressionModel = RegressionModel(256)
        self.classificationModel = ClassificationModel(256, num_classes=num_classes, dropout1=dropout1, dropout2=dropout2)
        self.anchors = Anchors()
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()
        #add arguments alpha, gamma loss_with_no_bboxes, no_bboxes_alpha, no_bboxes_gamma(customized by Yu Han Huang)
        self.focalLoss = losses.FocalLoss(alpha=alpha, gamma=gamma, loss_with_no_bboxes=loss_with_no_bboxes, no_bboxes_alpha=no_bboxes_alpha, no_bboxes_gamma=no_bboxes_gamma)
                
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        prior = 0.01
        
        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log((1.0-prior)/prior))
        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)
        self.freeze_bn()
    def __init__(self, num_classes, block, pretrained=False, phi=0):
        self.inplanes = w_bifpn[phi]
        super(EfficientDet, self).__init__()
        efficientnet = EfficientNet.from_pretrained(f'efficientnet-b{phi}')
        blocks = []
        count = 0
        fpn_sizes = []
        for block in efficientnet._blocks:
            blocks.append(block)
            if block._depthwise_conv.stride == [2, 2]:
                count += 1
                fpn_sizes.append(block._project_conv.out_channels)
                if len(fpn_sizes) >= 4:
                    break

        self.efficientnet = nn.Sequential(efficientnet._conv_stem,
                                          efficientnet._bn0, *blocks)
        num_layers = min(phi + 2, 8)
        self.fpn = BiFPN(fpn_sizes[1:],
                         feature_size=w_bifpn[phi],
                         num_layers=num_layers)

        d_class = 3 + (phi // 3)
        self.regressionModel = RegressionModel(w_bifpn[phi],
                                               feature_size=w_bifpn[phi],
                                               d_class=d_class)
        self.classificationModel = ClassificationModel(
            w_bifpn[phi],
            feature_size=w_bifpn[phi],
            d_class=d_class,
            num_classes=num_classes)

        self.anchors = Anchors()

        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()

        self.focalLoss = losses.FocalLoss().cuda()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        prior = 0.01

        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))

        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)

        self.freeze_bn()
Пример #6
0
    def __init__(self, num_classes, block, layers, normalization='batch_norm'):
        super(ResNet, self).__init__()
        self.inplanes = 64
        self.normalization = normalization


        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        if normalization == 'batch_norm':
            self.bn1 = nn.BatchNorm2d(64)
        else:
            self.bn1 = nn.GroupNorm(num_groups=8, num_channels=64)  # Note: Does not use preloaded imagenet weights, as BatchNorm does
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        if block == BasicBlock:
            fpn_sizes = [self.layer2[layers[1]-1].conv2.out_channels, self.layer3[layers[2]-1].conv2.out_channels, self.layer4[layers[3]-1].conv2.out_channels]
        elif block == Bottleneck:
            fpn_sizes = [self.layer2[layers[1]-1].conv3.out_channels, self.layer3[layers[2]-1].conv3.out_channels, self.layer4[layers[3]-1].conv3.out_channels]

        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])

        self.regressionModel = RegressionModel(256)
        self.classificationModel = ClassificationModel(256, num_classes=num_classes)

        self.anchors = Anchors()

        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()
        
        self.focalLoss = losses.FocalLoss()
                
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.GroupNorm):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            # elif :
                
                # raise NotImplementedError('Not Implemented: Contact @Vishnu')

        prior = 0.01
        
        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log((1.0-prior)/prior))

        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)

        self.freeze_bn()
Пример #7
0
def main():

    uuidList = ["e2c56db5dffb48d2b060d0f5a71096e0"]
    dev_id = 0
    #please give a nubmer below 40, since now we are assuming the packets are received roughly at the same time from the same batch of scan.
    #if the number is high, motion is then significant, rssi values are going to be less accurate.
    numberOfBeaconsToWait = 2000
    commissioningFileName = "/home/pi/rssi_positioning/commissionning.dat"

    host = '192.168.1.6'
    port = 5000
    reporter = Reporter(host, port)

    logger = Logger()

    try:
        anchors = Anchors(commissioningFileName)  #in anchors.py
        anchors.show_debug()
        scanner = Scanner(uuidList, dev_id, numberOfBeaconsToWait)
        i = 0
        while True:
            timestamp, returnedList = scanner.scan()
            dataPoint = DataPoint(uuidList, anchors, timestamp, returnedList)
            solver = Solver(dataPoint)
            edmSolver = EDMSolver(dataPoint)
            edmSolver.run()
            logger.logDataPoint(dataPoint)
            result = (solver.result + edmSolver.result) / 2
            # reporter.report(solver.result)
            # reporter.report(edmSolver.result)
            reporter.report(result)
            i += 1
            print("Running loop number: ", i)

    except KeyboardInterrupt:
        print("\nWarning: keyboard interrupt detected, quitting...")

    finally:
        #clean up
        print("Program done.")
Пример #8
0
 def update(self, newparam=None, anchors=None):
     if newparam:
         for key, value in newparam.items():
             setattr(self, key, value)
     if anchors is not None:
         if isinstance(anchors, dict):
             anchors = Anchors(anchors)
         if isinstance(anchors, Anchors):
             self.total_stride = anchors.stride
             self.ratios = anchors.ratios
             self.scales = anchors.scales
             self.round_dight = anchors.round_dight
     self.renew()
Пример #9
0
    def __init__(self, num_classes, backbone_network, fpn_sizes):
        """[summary]

        Args:
            num_classes ([int]): [description]
            backbone_network ([str]): [description]
            fpn_sizes ([list]): [number of channels
                                    in each backbone feature map]
        """
        self.inplanes = 64
        super(RetinaNet, self).__init__()
        # fpn_sizes = [160, 272, 448]
        # fpn_sizes = [56, 160, 448]
        # for b4
        # fpn_sizes = [160, 272, 448]

        # for b0
        # fpn_sizes = [112,192,1280]
        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])

        self.regressionModel = RegressionModel(256)
        self.classificationModel = ClassificationModel(256,
                                                       num_classes=num_classes)

        self.anchors = Anchors()

        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()

        self.focalLoss = losses.FocalLoss()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        prior = 0.01
        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))

        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)

        self.freeze_bn()

        self.efficientnet = backbone_network
Пример #10
0
def generate_anchor(cfg, score_size):
    anchors = Anchors(cfg)
    anchor = anchors.anchors
    x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3]
    anchor = np.stack([(x1+x2)*0.5, (y1+y2)*0.5, x2-x1, y2-y1], 1)

    total_stride = anchors.stride
    anchor_num = anchor.shape[0]

    anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4))
    ori = - (score_size // 2) * total_stride
    xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)],
                         [ori + total_stride * dy for dy in range(score_size)])
    xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \
             np.tile(yy.flatten(), (anchor_num, 1)).flatten()
    anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32)
    return anchor
Пример #11
0
    def __init__(self, config):
        super(Retina, self).__init__()

        self.anchors_per_grid_cell = len(config.anchor_ratios) * len(
            config.anchor_scales)
        self.classes = config.classes
        self.num_classes = len(self.classes) + 1

        self._backbone = resnet101(pretrained=True)

        names, layers = zip(*list(
            self._backbone.named_children())[:-2])  # leave off avgpool and fc

        self.backbone = []

        i = 0
        while i < len(names):
            j = i + 1
            while j < len(names) and not (names[j].startswith('layer')):
                j += 1
            self.backbone.append(torch.nn.Sequential(*layers[i:j]))
            i = j

        self.conv6 = torch.nn.Conv2d(2048, 256, 3, stride=2, padding=1)
        self.conv7 = torch.nn.Conv2d(256, 256, 3, stride=2, padding=1)
        self.conv5 = torch.nn.Conv2d(2048, 256, 3, padding=1)
        self.conv4 = torch.nn.Conv2d(1024, 256, 1)
        self.conv3 = torch.nn.Conv2d(512, 256, 1)
        self.conv2 = torch.nn.Conv2d(256, 256, 1)

        self.loc = self.mk_subnet(4, include_sigmoid=False)
        self.conf = self.mk_subnet(self.num_classes, include_sigmoid=False)

        self.anchors = Anchors(config)

        self.detect = Detect(self.num_classes, 200, 0.01, 0.45, self.anchors)
        self.config = config
Пример #12
0
    def __init__(self, num_classes, phi):
        feature_size = feature_sizes[phi]
        super(EfficientDet, self).__init__()

        self.backbone = geffnets[phi](pretrained=True,
                                      drop_rate=0.25,
                                      drop_connect_rate=0.2)

        # Get backbone feature sizes.
        fpn_sizes = [40, 80, 192]

        self.fpn = [
            PyramidFeatures(fpn_sizes, feature_size=feature_size,
                            index=index).cuda()
            for index in range(min(2 + phi, 8))
        ]

        self.regressionModel = RegressionModel(phi, feature_size=feature_size)
        self.classificationModel = ClassificationModel(
            phi, feature_size=feature_size, num_classes=num_classes)

        self.anchors = Anchors()

        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()

        self.focalLoss = losses.FocalLoss()

        prior = 0.01

        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))

        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)
Пример #13
0
    def __init__(self):
        super(Network, self).__init__()

        # stage 1 networks
        self.template_feature_extractor_global = TemplateFeatExtractGlobal()
        self.image_feature_extractor = ImageFeatExtract()
        self.template_feature_extractor = TemplateFeatExtract()
        self.correlation_model = CorrelationModel(640)

        # detection networks
        self.anchors = Anchors(pyramid_levels=[4],
                               ratios=[0.5, 1, 2],
                               sizes=[30],
                               scales=[1, 2, 3, 4, 5, 6, 7, 8])
        self.classification = ClassificationModel(512, num_anchors=24)
        self.regression = RegressionModel(512, num_anchors=24)

        # weight init
        prior = 0.01
        self.classification.output.weight.data.fill_(0)
        self.classification.output.bias.data.fill_(-math.log((1.0 - prior) /
                                                             prior))
        self.regression.output.weight.data.fill_(0)
        self.regression.output.bias.data.fill_(0)

        self.correlation_model.corr_conv_heatmap.weight.data.fill_(0)
        self.correlation_model.corr_conv_heatmap.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))

        self.correlation_model.seg_final.weight.data.fill_(0)
        self.correlation_model.seg_final.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))

        # utils
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()
Пример #14
0
    def __init__(self, cfg, anchor_cfg, num_epoch=1):
        super(DataSets, self).__init__()
        global logger
        logger = logging.getLogger('global')

        # anchors
        self.anchors = Anchors(anchor_cfg)

        # size
        self.template_size = 127
        self.search_size = 255
        self.size = 17
        self.base_size = 0
        self.crop_size = 0

        if 'template_size' in cfg:
            self.template_size = cfg['template_size']
        if 'search_size' in cfg:
            self.search_size = cfg['search_size']
        if 'base_size' in cfg:
            self.base_size = cfg['base_size']
        if 'size' in cfg:
            self.size = cfg['size']

        # if (self.search_size - self.template_size) / self.anchors.stride + 1 + self.base_size != self.size:
        #     raise Exception("size not match!")  # TODO: calculate size online

        if 'crop_size' in cfg:
            self.crop_size = cfg['crop_size']
        self.template_small = False
        if 'template_small' in cfg and cfg['template_small']:
            self.template_small = True

        self.anchors.generate_all_anchors(im_c=self.search_size//2, size=self.size)

        if 'anchor_target' not in cfg:
            cfg['anchor_target'] = {}
        self.anchor_target = AnchorTargetLayer(cfg['anchor_target'])

        # data sets
        if 'datasets' not in cfg:
            raise(Exception('DataSet need "{}"'.format('datasets')))

        self.all_data = []
        start = 0
        self.num = 0
        for name in cfg['datasets']:
            dataset = cfg['datasets'][name]
            dataset['mark'] = name
            dataset['start'] = start

            dataset = SubDataSet(dataset)
            dataset.log()
            self.all_data.append(dataset)

            start += dataset.num  # real video number
            self.num += dataset.num_use  # the number used for subset shuffle

        # data augmentation
        aug_cfg = cfg['augmentation']
        self.template_aug = Augmentation(aug_cfg['template'])
        self.search_aug = Augmentation(aug_cfg['search'])
        self.gray = aug_cfg['gray']
        self.neg = aug_cfg['neg']
        self.inner_neg = 0 if 'inner_neg' not in aug_cfg else aug_cfg['inner_neg']

        self.pick = None  # list to save id for each img
        if 'num' in cfg:  # number used in training for all dataset
            self.num = int(cfg['num'])
        self.num *= num_epoch
        self.shuffle()

        self.infos = {
                'template': self.template_size,
                'search': self.search_size,
                'template_small': self.template_small,
                'gray': self.gray,
                'neg': self.neg,
                'inner_neg': self.inner_neg,
                'crop_size': self.crop_size,
                'anchor_target': self.anchor_target.__dict__,
                'num': self.num // num_epoch
                }
        logger.info('dataset informations: \n{}'.format(json.dumps(self.infos, indent=4)))
Пример #15
0
class SiamMask(nn.Module):
    def __init__(self, anchors=None, o_sz=63, g_sz=127):
        super(SiamMask, self).__init__()
        self.anchors = anchors  # anchor_cfg
        self.anchor_num = len(self.anchors["ratios"]) * len(
            self.anchors["scales"])
        self.anchor = Anchors(anchors)
        self.features = None
        self.rpn_model = None
        self.mask_model = None
        self.o_sz = o_sz
        self.g_sz = g_sz
        self.upSample = nn.UpsamplingBilinear2d(size=[g_sz, g_sz])

        self.all_anchors = None

    def set_all_anchors(self, image_center, size):
        # cx,cy,w,h
        if not self.anchor.generate_all_anchors(image_center, size):
            return
        all_anchors = self.anchor.all_anchors[1]  # cx, cy, w, h
        self.all_anchors = torch.from_numpy(all_anchors).float().cuda()
        self.all_anchors = [self.all_anchors[i] for i in range(4)]

    def feature_extractor(self, x):
        return self.features(x)

    def rpn(self, template, search):
        pred_cls, pred_loc = self.rpn_model(template, search)
        return pred_cls, pred_loc

    def mask(self, template, search):
        pred_mask = self.mask_model(template, search)
        return pred_mask

    def _add_rpn_loss(self, label_cls, label_loc, lable_loc_weight, label_mask,
                      label_mask_weight, rpn_pred_cls, rpn_pred_loc,
                      rpn_pred_mask):
        rpn_loss_cls = select_cross_entropy_loss(rpn_pred_cls, label_cls)

        rpn_loss_loc = weight_l1_loss(rpn_pred_loc, label_loc,
                                      lable_loc_weight)

        rpn_loss_mask, iou_m, iou_5, iou_7 = select_mask_logistic_loss(
            rpn_pred_mask, label_mask, label_mask_weight)

        return rpn_loss_cls, rpn_loss_loc, rpn_loss_mask, iou_m, iou_5, iou_7

    def run(self, template, search, softmax=False):
        """
        run network
        """
        template_feature = self.feature_extractor(template)
        search_feature = self.feature_extractor(search)
        rpn_pred_cls, rpn_pred_loc = self.rpn(template_feature, search_feature)
        rpn_pred_mask = self.mask(template_feature,
                                  search_feature)  # (b, 63*63, w, h)

        if softmax:
            rpn_pred_cls = self.softmax(rpn_pred_cls)
        return rpn_pred_cls, rpn_pred_loc, rpn_pred_mask, template_feature, search_feature

    def softmax(self, cls):
        b, a2, h, w = cls.size()
        cls = cls.view(b, 2, a2 // 2, h, w)
        cls = cls.permute(0, 2, 3, 4, 1).contiguous()
        cls = F.log_softmax(cls, dim=4)
        return cls

    def forward(self, input):
        """
        :param input: dict of input with keys of:
                'template': [b, 3, h1, w1], input template image.
                'search': [b, 3, h2, w2], input search image.
                'label_cls':[b, max_num_gts, 5] or None(self.training==False),
                                     each gt contains x1,y1,x2,y2,class.
        :return: dict of loss, predict, accuracy
        """
        template = input['template']
        search = input['search']
        if self.training:
            label_cls = input['label_cls']
            label_loc = input['label_loc']
            lable_loc_weight = input['label_loc_weight']
            label_mask = input['label_mask']
            label_mask_weight = input['label_mask_weight']

        rpn_pred_cls, rpn_pred_loc, rpn_pred_mask, template_feature, search_feature = \
            self.run(template, search, softmax=self.training)

        outputs = dict()

        outputs['predict'] = [
            rpn_pred_loc, rpn_pred_cls, rpn_pred_mask, template_feature,
            search_feature
        ]

        if self.training:
            rpn_loss_cls, rpn_loss_loc, rpn_loss_mask, iou_acc_mean, iou_acc_5, iou_acc_7 = \
                self._add_rpn_loss(label_cls, label_loc, lable_loc_weight, label_mask, label_mask_weight,
                                   rpn_pred_cls, rpn_pred_loc, rpn_pred_mask)
            outputs['losses'] = [rpn_loss_cls, rpn_loss_loc, rpn_loss_mask]
            outputs['accuracy'] = [iou_acc_mean, iou_acc_5, iou_acc_7]

        return outputs

    def template(self, z):
        self.zf = self.feature_extractor(z)
        cls_kernel, loc_kernel = self.rpn_model.template(self.zf)
        return cls_kernel, loc_kernel

    def track(self, x, cls_kernel=None, loc_kernel=None, softmax=False):
        xf = self.feature_extractor(x)
        rpn_pred_cls, rpn_pred_loc = self.rpn_model.track(
            xf, cls_kernel, loc_kernel)
        if softmax:
            rpn_pred_cls = self.softmax(rpn_pred_cls)
        return rpn_pred_cls, rpn_pred_loc
Пример #16
0
    def __init__(self, num_class, block, layers):
        super(ResNet, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Sequential(
            OrderedDict([('Conv1',
                          nn.Conv2d(3,
                                    64,
                                    kernel_size=7,
                                    stride=2,
                                    padding=3,
                                    bias=False)), ('BN', nn.BatchNorm2d(64)),
                         ('Relu', nn.ReLU(inplace=True)),
                         ('Maxpooling',
                          nn.MaxPool2d(kernel_size=3, stride=2, padding=1))]))

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        if block == BasicBlock:
            fpn_sizes = [
                self.layer2[layers[1] - 1].channels,
                self.layer3[layers[2] - 1].channels,
                self.layer4[layers[3] - 1].channels
            ]
        elif block == Bottleneck:
            fpn_sizes = [
                self.layer2[layers[1] - 1].channels,
                self.layer3[layers[2] - 1].channels,
                self.layer4[layers[3] - 1].channels
            ]

        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])

        self.regression = Regression(256)
        self.classification = Classification(256, num_classes=num_class)

        self.anchors = Anchors()

        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()

        self.focalLoss = losses.FocalLoss()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        prior = 0.01

        # self.classification.output.weight.data.fill_(0)
        self.classification.output.bias.data.fill_(-torch.log(
            (torch.tensor(1.0 - prior).float()) / prior))

        # self.regression.output.weight.data.fill_(0)
        self.regression.output.bias.data.fill_(0)

        self.freeze_bn()
Пример #17
0
def main(args):
    use_cuda = args.use_cuda
    if not os.path.exists(cfgs.model_dir):
        os.makedirs(cfgs.model_dir)
    #*******************************************************************************create logg
    log_dir = args.log_dir
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    logger = logging.getLogger()
    log_name = time.strftime('%F-%T', time.localtime()).replace(':',
                                                                '-') + '.log'
    log_path = os.path.join(log_dir, log_name)
    hdlr = logging.FileHandler(log_path)
    logger.addHandler(hdlr)
    logger.addHandler(logging.StreamHandler())
    logger.setLevel(logging.DEBUG)
    #*****************************************************************************Create the data loaders
    dataset_train, dataloader_val = dataset_factory()
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=4,
                                  collate_fn=detection_collate,
                                  batch_size=args.batch_size,
                                  shuffle=True)
    # dataloader_val = DataLoader(dataset_val,num_workers=1,batch_size=1)
    #  dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)
    #*********************************************************************************load model
    if torch.cuda.is_available() and use_cuda:
        device = torch.device('cuda')
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_list
    else:
        device = torch.device('cpu')

    retinanet = RetinaMask(9, 'train').to(device)
    if args.model_path:
        model_weights = torch.load(args.model_path, map_location=device)
        #model_weights = rename_dict(model_weights)
        retinanet.load_state_dict(model_weights, strict=True)
        logger.info("load weightes success: {}".format(args.model_path))
    BoxDetector = RetinanetDetector()
    #****************************************************************** load anchor
    get_anchors = Anchors()
    anchors = get_anchors(cfgs.IMGHeight, cfgs.IMGWidth)
    if use_cuda:
        anchors = anchors.cuda().float()
    print("anchors:", anchors.size())
    #*******************************************************************creat loss
    # focalLoss = FocalLoss()
    criterion = MultiBoxLoss(use_gpu=use_cuda)
    if len(args.gpu_list.split(',')) > 0:
        retinanet = torch.nn.DataParallel(retinanet)
    # retinanet.train()
    # optimizer = optim.Adam(retinanet.parameters(), lr=args.lr,weight_decay=args.weight_decay)
    optimizer = optim.SGD(retinanet.parameters(),
                          lr=args.lr,
                          momentum=0.9,
                          weight_decay=args.weight_decay)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)
    loss_hist = collections.deque(maxlen=500)
    loss_reg = collections.deque(maxlen=500)
    loss_cls = collections.deque(maxlen=500)
    # retinanet.module.freeze_bn()
    logger.info('Num training images: {}'.format(dataset_train.__len__()))
    rgb_mean = np.array([0.485, 0.456, 0.406])[np.newaxis,
                                               np.newaxis, :].astype('float32')
    rgb_std = np.array([0.229, 0.224, 0.225])[np.newaxis,
                                              np.newaxis, :].astype('float32')
    step = 0
    tmp_max = 0.0
    for epoch_num in range(args.start_iter, args.epochs):
        retinanet.train()
        #retinanet.module.freeze_bn()
        lr = poly_lr_scheduler(optimizer,
                               args.lr,
                               epoch_num,
                               max_iter=args.epochs,
                               power=0.8)
        for idx, (img_batch, gt_batch) in enumerate(dataloader_train):
            save_fg = 0
            step += 1
            if use_cuda:
                img_batch = img_batch.cuda()
            '''
            images = img_batch.numpy()
            targets = gt_batch
            priors = anchors
            conf_t = test_anchor(targets,priors)
            for i in range(args.batch_size):
                tmp_img = np.transpose(images[i],(1,2,0))
                # tmp_img = tmp_img + rgb_mean
                # tmp_img = tmp_img * 255
                tmp_img *= rgb_std
                tmp_img += rgb_mean
                tmp_img *=255
                tmp_img = np.array(tmp_img,dtype=np.uint8)
                tmp_img = cv2.cvtColor(tmp_img,cv2.COLOR_RGB2BGR)
                h,w = tmp_img.shape[:2]
                if len(targets[i])>0:
                    gt = targets[i]
                    for j in range(gt.shape[0]):
                        x1,y1 = int(gt[j,0]),int(gt[j,1])
                        x2,y2 = int(gt[j,2]),int(gt[j,3])
                        # print('pred',x1,y1,x2,y2,gt[j,4],w,h)
                        if x2 >x1 and y2 >y1:
                            cv2.rectangle(tmp_img,(x1,y1),(x2,y2),(0,0,255))
                for j in range(priors.size(0)):
                    if conf_t[i,j] >0:
                        box = priors[j].cpu().numpy()
                        # print(box)
                        x1,y1 = box[0],box[1]
                        x2,y2 = box[2],box[3]
                        x1,y1 = int(x1),int(y1)
                        x2,y2 = int(x2),int(y2)
                        cv2.rectangle(tmp_img,(x1,y1),(x2,y2),(255,0,0))
                cv2.imwrite('train_match4.jpg',tmp_img)
                cv2.imshow('src',tmp_img)
                cv2.waitKey(0)
            '''
            classification, regression, _ = retinanet(img_batch)
            #print("begin to cal loss")
            classification_loss, regression_loss = criterion(
                [classification, regression, anchors], gt_batch)
            # classification_loss = classification_loss.mean()
            # regression_loss = regression_loss.mean()
            loss = classification_loss + regression_loss
            if bool(loss == 0):
                continue
            optimizer.zero_grad()
            loss.backward()
            #torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
            optimizer.step()
            loss_hist.append(float(loss.item()))
            loss_cls.append(float(classification_loss.item()))
            loss_reg.append(float(regression_loss.item()))
            if step % 500 == 0:
                logger.info(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f} | cls_mean:{:.6f} | reg_mean:{:.6f} | lr: {:.6f}'
                    .format(epoch_num, step, classification_loss.item(),
                            regression_loss.item(), np.mean(loss_hist),
                            np.mean(loss_cls), np.mean(loss_reg), lr))
            if step % 3000 == 0:
                #     mmap = test_net(retinanet,BoxDetector,anchors,dataloader_val,use_cuda,'train',args)
                save_fg = 1
            # if mmap > tmp_max:
            #     tmp_max = mmap
            #     save_fg = 1
            if save_fg:
                sfile = sfile = 'retina_' + args.dataset + '_best.pth'
                spath = os.path.join(cfgs.model_dir, sfile)
                if len(args.gpu_list.split(',')) > 0:
                    torch.save(retinanet.module.state_dict(), spath)
                else:
                    torch.save(retinanet.state_dict(), spath)
                logger.info("*****************save weightes******,%d" % step)
Пример #18
0
    def __init__(self, num_classes, block, layers):
        super(ResNet, self).__init__()
        self.inplanes = 64

        self.conv1 = nn.Conv2d(3,
                               64,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        # self.relu = nn.ReLU(inplace=True)
        # self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block,
                                       planes=64,
                                       blocks=layers[0],
                                       stride=1)
        self.layer2 = self._make_layer(block,
                                       planes=128,
                                       blocks=layers[1],
                                       stride=2)
        self.layer3 = self._make_layer(block,
                                       planes=256,
                                       blocks=layers[2],
                                       stride=2)
        self.layer4 = self._make_layer(block,
                                       planes=512,
                                       blocks=layers[3],
                                       stride=2)

        if block == BasicBlock:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv2.out_channels,
                self.layer3[layers[2] - 1].conv2.out_channels,
                self.layer4[layers[3] - 1].conv2.out_channels
            ]
        elif block == Bottleneck:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv3.out_channels,
                self.layer3[layers[2] - 1].conv3.out_channels,
                self.layer4[layers[3] - 1].conv3.out_channels
            ]

        # if block == BasicBlock:
        #     fpn_sizes = [self.layer1[layers[1]-1].conv2.out_channels, self.layer2[layers[1]-1].conv2.out_channels, self.layer3[layers[2]-1].conv2.out_channels, self.layer4[layers[3]-1].conv2.out_channels]
        # elif block == Bottleneck:
        #     fpn_sizes = [self.layer1[layers[1]-1].conv2.out_channels, self.layer2[layers[1]-1].conv3.out_channels, self.layer3[layers[2]-1].conv3.out_channels, self.layer4[layers[3]-1].conv3.out_channels]

        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])
        self.regressionModel = RegressionModel(256)
        self.classificationModel = ClassificationModel(256,
                                                       num_classes=num_classes)
        self.siameseNetwork = SiameseNetwork()

        self.anchors = Anchors()

        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()

        self.focalLoss = losses.FocalLoss()

        self.cropBoxes = utils.CropBoxes()

        # pooler = Pooler(
        #     output_size=(6, 6),
        #     scales=(1.0/8, 1.0/16, 1.0/32,), #1.0/64, 1.0/128),
        #     sampling_ratio=0,
        #     canonical_level=4,
        # )
        # self.pooler = pooler

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        prior = 0.01

        self.classificationModel.conv5.weight.data.fill_(0)
        self.classificationModel.conv5.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))

        self.regressionModel.conv5.weight.data.fill_(0)
        self.regressionModel.conv5.bias.data.fill_(0)

        self.freeze_bn()
Пример #19
0
    def __init__(self, num_classes, block, layers, num_anchors=3):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3,
                               64,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        if block == BasicBlock:
            fpn_sizes = [
                self.layer1[layers[0] - 1].conv2.out_channels,
                self.layer2[layers[1] - 1].conv2.out_channels,
                self.layer3[layers[2] - 1].conv2.out_channels,
                self.layer4[layers[3] - 1].conv2.out_channels
            ]
        elif block == Bottleneck:
            fpn_sizes = [
                self.layer1[layers[0] - 1].conv3.out_channels,
                self.layer2[layers[1] - 1].conv3.out_channels,
                self.layer3[layers[2] - 1].conv3.out_channels,
                self.layer4[layers[3] - 1].conv3.out_channels
            ]

        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2],
                                   fpn_sizes[3])

        self.context = self._make_contextlayer()

        self.clsHead = ClassHead_()
        self.bboxHead = BboxHead_()
        self.ldmHead = LandmarkHead_()

        # self.clsHead = self._make_class_head()
        # self.bboxHead = self._make_bbox_head()
        # self.ldmHead = self._make_landmark_head()

        self.anchors = Anchors()

        self.regressBoxes = RegressionTransform()

        self.losslayer = losses.LossLayer()

        self.freeze_bn()

        # initialize head
        # self.clsHead.apply(initialize_layer)
        # self.bboxHead.apply(initialize_layer)
        # self.ldmHead.apply(initialize_layer)

        # initialize context
        for layer in self.context:
            for m in layer.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.normal_(m.weight, std=0.01)
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
                if isinstance(m, nn.BatchNorm2d):
                    nn.init.constant_(m.weight, 1)
                    nn.init.constant_(m.bias, 0)
    host = '192.168.1.6'
    port = 5000
    serverAddress = (host, port)
    print("Starting data socket server on %s:%s" % serverAddress)
    dataSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    dataSocket.bind(serverAddress)
    dataSocket.listen(
        1)  #listen to a maximum number of queued connections of 1
    print("Data socket server on %s:%s is listening now." % serverAddress)
    connection, clientAddress = dataSocket.accept()
    print("Connection established with client with IP: %s:%s" % clientAddress)
except:
    pass

commissioningFileName = "commissionning.dat"
anchors = Anchors(commissioningFileName)

p = figure(plot_width=800, plot_height=800)
r_anchors = p.scatter([x * 0.6096 for x in anchors.listOfX],
                      [y * 0.6096 for y in anchors.listOfY],
                      size=10,
                      color="black",
                      alpha=0.6)
p.xaxis.axis_label = "X(meter)"
p.yaxis.axis_label = "Y(meter)"
r = p.scatter([], [], size=6, color="firebrick", alpha=0.6)
ds = r.data_source

curdoc().add_root(p)
curdoc().title = "Visualization of positioning results"
# Add a periodic callback to be run every 500 milliseconds
Пример #21
0
import os
import json
import cv2
from dataloader import *
from PIL import Image

os.environ["CUDA_VISIBLE_DEVICES"] = "7"

dataset = CocoDataset('../dataset',
                      set_name='val2017',
                      transform=transforms.Compose([Normalizer(),
                                                    Resizer()]))

device = torch.device('cuda:0')

anchors = Anchors()
fpn = torch.load("fpn.pt")
net = torch.load("fsaf.pt")

fpn = fpn.to(device)
net = net.to(device)

fpn.eval()
net.eval()


def normalizer(image):
    mean = np.array([[[0.485, 0.456, 0.406]]])
    std = np.array([[[0.229, 0.224, 0.225]]])
    return (image - mean) / std
Пример #22
0
        j = np.where(fppi_tmp <= ref_i)[-1][-1]
        ref[i] = mr_tmp[j]

    # log(0) is undefined, so we use the np.maximum(1e-10, ref)
    lamr = math.exp(np.mean(np.log(np.maximum(1e-10, ref))))

    return lamr, mr, fppi


if __name__ == '__main__':
    # load net
    args = params()
    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)
    use_cuda = torch.cuda.is_available()
    if use_cuda:
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        device = torch.device('cuda')
    else:
        torch.set_default_tensor_type('torch.FloatTensor')
        device = torch.device('cpu')
    net = RetinaMask(cfgs.CLSNUM, 'test').to(device)
    net.load_state_dict(torch.load(args.trained_model, map_location=device))
    net.eval()
    detector = RetinanetDetector()
    print('finish loading model')
    dataset = ReadDataset(args.voc_root, args.val_file)
    get_anchors = Anchors()
    priors = get_anchors(640, 640)
    test_net(net, detector, priors, dataset, use_cuda, 'test', args)
Пример #23
0
class DataSets(Dataset):
    def __init__(self, cfg, anchor_cfg, num_epoch=1):
        super(DataSets, self).__init__()
        global logger
        logger = logging.getLogger('global')

        # anchors
        self.anchors = Anchors(anchor_cfg)

        # size
        self.template_size = 127
        self.search_size = 255
        self.size = 17
        self.base_size = 0
        self.crop_size = 0

        if 'template_size' in cfg:
            self.template_size = cfg['template_size']
        if 'search_size' in cfg:
            self.search_size = cfg['search_size']
        if 'base_size' in cfg:
            self.base_size = cfg['base_size']
        if 'size' in cfg:
            self.size = cfg['size']

        # if (self.search_size - self.template_size) / self.anchors.stride + 1 + self.base_size != self.size:
        #     raise Exception("size not match!")  # TODO: calculate size online

        if 'crop_size' in cfg:
            self.crop_size = cfg['crop_size']
        self.template_small = False
        if 'template_small' in cfg and cfg['template_small']:
            self.template_small = True

        self.anchors.generate_all_anchors(im_c=self.search_size//2, size=self.size)

        if 'anchor_target' not in cfg:
            cfg['anchor_target'] = {}
        self.anchor_target = AnchorTargetLayer(cfg['anchor_target'])

        # data sets
        if 'datasets' not in cfg:
            raise(Exception('DataSet need "{}"'.format('datasets')))

        self.all_data = []
        start = 0
        self.num = 0
        for name in cfg['datasets']:
            dataset = cfg['datasets'][name]
            dataset['mark'] = name
            dataset['start'] = start

            dataset = SubDataSet(dataset)
            dataset.log()
            self.all_data.append(dataset)

            start += dataset.num  # real video number
            self.num += dataset.num_use  # the number used for subset shuffle

        # data augmentation
        aug_cfg = cfg['augmentation']
        self.template_aug = Augmentation(aug_cfg['template'])
        self.search_aug = Augmentation(aug_cfg['search'])
        self.gray = aug_cfg['gray']
        self.neg = aug_cfg['neg']
        self.inner_neg = 0 if 'inner_neg' not in aug_cfg else aug_cfg['inner_neg']

        self.pick = None  # list to save id for each img
        if 'num' in cfg:  # number used in training for all dataset
            self.num = int(cfg['num'])
        self.num *= num_epoch
        self.shuffle()

        self.infos = {
                'template': self.template_size,
                'search': self.search_size,
                'template_small': self.template_small,
                'gray': self.gray,
                'neg': self.neg,
                'inner_neg': self.inner_neg,
                'crop_size': self.crop_size,
                'anchor_target': self.anchor_target.__dict__,
                'num': self.num // num_epoch
                }
        logger.info('dataset informations: \n{}'.format(json.dumps(self.infos, indent=4)))

    def imread(self, path):
        img = cv2.imread(path)
        return img, 1.0

    def shuffle(self):
        pick = []
        m = 0
        while m < self.num:
            p = []
            for subset in self.all_data:
                sub_p = subset.shuffle()
                p += sub_p

            sample_random.shuffle(p)

            pick += p
            m = len(pick)
        self.pick = pick
        logger.info("shuffle done!")
        logger.info("dataset length {}".format(self.num))

    def __len__(self):
        return self.num

    def find_dataset(self, index):
        for dataset in self.all_data:
            if dataset.start + dataset.num > index:
                return dataset, index - dataset.start

    def __getitem__(self, index, debug=False):
        index = self.pick[index]
        dataset, index = self.find_dataset(index)

        gray = self.gray and self.gray > random.random()
        neg = self.neg and self.neg > random.random()

        if neg:
            template = dataset.get_random_target(index)
            if self.inner_neg and self.inner_neg > random.random():
                search = dataset.get_random_target()
            else:
                search = random.choice(self.all_data).get_random_target()
        else:
            template, search = dataset.get_positive_pair(index)

        def center_crop(img, size):
            shape = img.shape[1]
            if shape == size: return img
            c = shape // 2
            l = c - size // 2
            r = c + size // 2 + 1
            return img[l:r, l:r]

        template_image, scale_z = self.imread(template[0])

        if self.template_small:
            template_image = center_crop(template_image, self.template_size)

        search_image, scale_x = self.imread(search[0])

        if dataset.has_mask and not neg:
            # print(search[2])
            search_mask = (cv2.imread(search[2], 0) > 0).astype(np.float32)
        else:
            search_mask = np.zeros(search_image.shape[:2], dtype=np.float32)

        if self.crop_size > 0:
            search_image = center_crop(search_image, self.crop_size)
            search_mask = center_crop(search_mask, self.crop_size)

        def toBBox(image, shape):
            imh, imw = image.shape[:2]
            if len(shape) == 4:
                w, h = shape[2]-shape[0], shape[3]-shape[1]
            else:
                w, h = shape
            context_amount = 0.5
            exemplar_size = self.template_size  # 127
            wc_z = w + context_amount * (w+h)
            hc_z = h + context_amount * (w+h)
            s_z = np.sqrt(wc_z * hc_z)
            scale_z = exemplar_size / s_z
            w = w*scale_z
            h = h*scale_z
            cx, cy = imw//2, imh//2
            bbox = center2corner(Center(cx, cy, w, h))
            return bbox

        template_box = toBBox(template_image, template[1])
        search_box = toBBox(search_image, search[1])

        template, _, _ = self.template_aug(template_image, template_box, self.template_size, gray=gray)
        search, bbox, mask = self.search_aug(search_image, search_box, self.search_size, gray=gray, mask=search_mask)

        def draw(image, box, name):
            image = image.copy()
            x1, y1, x2, y2 = map(lambda x: int(round(x)), box)
            cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0))
            cv2.imwrite(name, image)

        if debug:
            draw(template_image, template_box, "debug/{:06d}_ot.jpg".format(index))
            draw(search_image, search_box, "debug/{:06d}_os.jpg".format(index))
            draw(template, _, "debug/{:06d}_t.jpg".format(index))
            draw(search, bbox, "debug/{:06d}_s.jpg".format(index))

        cls, delta, delta_weight = self.anchor_target(self.anchors, bbox, self.size, neg)
        if dataset.has_mask and not neg:
            mask_weight = cls.max(axis=0, keepdims=True)
        else:
            mask_weight = np.zeros([1, cls.shape[1], cls.shape[2]], dtype=np.float32)

        template, search = map(lambda x: np.transpose(x, (2, 0, 1)).astype(np.float32), [template, search])
        
        mask = (np.expand_dims(mask, axis=0) > 0.5) * 2 - 1  # 1*H*W

        return template, search, cls, delta, delta_weight, np.array(bbox, np.float32), \
               np.array(mask, np.float32), np.array(mask_weight, np.float32)
Пример #24
0
    def __init__(
        self,
        num_classes,
        block=Bottleneck,
        layers=[3, 4, 6, 3],
        prior=0.01,
        no_rpn=False,
        no_semantic=False,
        bypass_semantic=False,
        squeeze=True,
        decoder_dropout=None,
        decoder_activation=nn.ReLU(),
        encoder_activation=nn.ReLU(inplace=True),
        batch_norm=False,
        regr_feature_sizes=[256] * 3,
        class_feature_sizes=[256] * 3,
    ):
        super(RetinaNet, self).__init__()
        self.bypass_semantic = bypass_semantic
        self.squeeze = squeeze
        self.pyramid_levels = [3, 4, 5]
        self.no_rpn = no_rpn
        self.no_semantic = no_semantic
        self.encoder = ResNet(block=block,
                              layers=layers,
                              activation=encoder_activation)
        self.fpn_sizes = [
            self.get_out_channels(getattr(self.encoder, "layer%d" % nn))
            for nn in [2, 3, 4]
        ]
        #self.fpn_sizes.append([sz[-1]//2 for sz in self.fpn_sizes[-1]])
        print("fpn_sizes")
        print(*self.fpn_sizes, sep='\t')
        #         if block == BasicBlock:
        #             fpn_sizes = [self.layer2[-1].conv2.out_channels,
        #                          self.layer3[-1].conv2.out_channels,
        #                          self.layer4[-1].conv2.out_channels]
        #             print
        #         elif block == Bottleneck:
        #             fpn_sizes = [self.layer2[-1].conv3.out_channels,
        #                          self.layer3[-1].conv3.out_channels,
        #                          self.layer4[-1].conv3.out_channels]

        #         self.decoder = UNetDecode(num_classes, hid_channels=fpn_sizes)
        self.decoder = nn.Sequential(
            UNetDecode(256,
                       hid_channels=self.fpn_sizes,
                       dropout=decoder_dropout,
                       batch_norm=batch_norm,
                       activation=decoder_activation),
            UpsampleBlock(in_channels=256,
                          out_channels=1 + num_classes,
                          steps=3,
                          activation=decoder_activation,
                          batch_norm=batch_norm))

        self.enc_to_logits = nn.ModuleList(
            [EncToLogits(n, num_classes + 1) for n in self.fpn_sizes])
        #self.fpn = PyramidFeatures(self.fpn_sizes[0], self.fpn_sizes[1], self.fpn_sizes[2])
        #self.regressionModel = RegressionModel(256)
        #self.classificationModel = ClassificationModel(256, num_classes=num_classes)
        self.fpn = PyramidFeatures(*([num_classes + 1] * 3))

        self.regressionModel = RegressionModel(
            num_classes + 1,
            batch_norm=batch_norm,
            activation=decoder_activation,
            feature_sizes=regr_feature_sizes)
        self.classificationModel = ClassificationModel(
            num_classes + 1,
            num_classes=num_classes,
            batch_norm=batch_norm,
            activation=decoder_activation,
            w_init=0.0,
            feature_sizes=class_feature_sizes)

        self.anchors = Anchors(pyramid_levels=self.pyramid_levels,
                               squeeze=squeeze)

        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        self.classificationModel.final.weight.data.fill_(0)
        self.classificationModel.final.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))

        self.regressionModel.seq.convblock_final.conv.weight.data.fill_(0)
        self.regressionModel.seq.convblock_final.conv.bias.data.fill_(0)

        self.freeze_bn()
Пример #25
0
    def __init__(self,
                 num_classes,
                 block,
                 layers,
                 max_boxes,
                 score_threshold,
                 seg_level,
                 alphabet,
                 train_htr,
                 htr_gt_box,
                 ner_branch=False,
                 binary_classifier=True):
        self.inplanes = 64
        self.pool_h = 2
        self.pool_w = 400
        self.forward_transcription = False
        self.max_boxes = max_boxes
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3,
                               64,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.downsampling_factors = [8, 16, 32, 64, 128]
        self.epochs_only_det = 1
        self.score_threshold = score_threshold
        self.alphabet = alphabet
        self.train_htr = train_htr
        self.binary_classifier = binary_classifier
        self.htr_gt_box = htr_gt_box
        self.num_classes = num_classes
        self.ner_branch = ner_branch

        if block == BasicBlock:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv2.out_channels,
                self.layer3[layers[2] - 1].conv2.out_channels,
                self.layer4[layers[3] - 1].conv2.out_channels
            ]
        elif block == Bottleneck:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv3.out_channels,
                self.layer3[layers[2] - 1].conv3.out_channels,
                self.layer4[layers[3] - 1].conv3.out_channels
            ]

        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])

        self.anchors = Anchors(seg_level=seg_level)
        self.regressionModel = RegressionModel(
            num_features_in=256, num_anchors=self.anchors.num_anchors)
        self.recognitionModel = RecognitionModel(feature_size=256,
                                                 pool_h=self.pool_h,
                                                 alphabet_len=len(alphabet))
        if ner_branch:
            self.nerModel = NERModel(feature_size=256,
                                     pool_h=self.pool_h,
                                     n_classes=num_classes,
                                     pool_w=self.pool_w)
        self.classificationModel = ClassificationModel(
            num_features_in=256,
            num_anchors=self.anchors.num_anchors,
            num_classes=num_classes)
        self.boxSampler = BoxSampler('train', self.score_threshold)
        self.sorter = RoISorter()
        self.regressBoxes = BBoxTransform()

        self.clipBoxes = ClipBoxes()

        self.focalLoss = losses.FocalLoss()
        if ner_branch:
            self.nerLoss = losses.NERLoss()
        self.transcriptionLoss = losses.TranscriptionLoss()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        prior = 0.01

        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))

        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)

        self.recognitionModel.output.weight.data.fill_(0)

        self.recognitionModel.output.bias.data.fill_(-math.log((1.0 - prior) /
                                                               prior))
        if ner_branch:
            self.nerModel.output.weight.data.fill_(0)

            self.nerModel.output.bias.data.fill_(-math.log((1.0 - prior) /
                                                           prior))
        self.freeze_bn()
Пример #26
0
 def laod_anchor(self):
     get_anchor = Anchors()
     img_batch = torch.ones((1, 3, self.img_size, self.img_size))
     self.anchors = get_anchor(img_batch)
     if torch.cuda.is_available():
         self.anchors = self.anchors.cuda()
Пример #27
0
# encoding:utf-8
'''
测试评价
'''

import numpy as np
import xml.etree.ElementTree as ET
import os
import json
import cv2

testAnchor = False

if testAnchor:
    from anchors import Anchors
    anchorsC = Anchors()
    anchors = anchorsC.get_anchors(fmSizes=[(16, 16), (8, 8)],
                                   fmBased=True,
                                   imgSize=1)


def parsingR(fileName):
    tmpDict = {}
    # tmp = []
    tmpTime = []
    with open(fileName, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines:
            line = line.replace('\n', '')
            line = line.replace('.jpg', '')
            items = line.split('\t')
Пример #28
0
 def laod_anchor(self):
     get_anchor = Anchors()
     # img_batch = torch.ones((1,3,self.img_size,self.img_size))
     self.anchors = get_anchor(self.imgh,self.imgw)
     if self.use_gpu:
         self.anchors = self.anchors.cuda()
Пример #29
0
        net.load_state_dict(torch.load(args.trained_model))
    else:
        state_dict = torch.load(args.trained_model, map_location='cpu')
        #state_dict_new = dict()
        #for key,value in list(state_dict.items()):
        #   state_dict_new[key[7:]] = value
        net.load_state_dict(state_dict)
    net.eval()
    print('Finished loading model!')
    # load data
    #dataset = VOCDetection(args.voc_root, [('2007', set_type)],
    #                      BaseTransform(int(args.input_size), dataset_mean),
    #                     VOCAnnotationTransform())
    dataset = TestDataset()
    if args.cuda:
        net = net.cuda()
        cudnn.benchmark = True
    get_anchor = Anchors()
    img_batch = torch.ones((1, 3, cfgs.ImgSize, cfgs.ImgSize))
    anchors = get_anchor([640, 640])
    print('anchor', anchors.data.size())
    # evaluation
    test_net(args.save_folder,
             net,
             Detector,
             args.cuda,
             dataset,
             args.top_k,
             anchors,
             thresh=args.confidence_threshold)