예제 #1
0
def siamese_init(im, target_pos, target_sz, model, hp=None, device='cpu'):
    """
    初始化跟踪器,根据目标的信息构建state 字典
    :param im: 当前处理的图像
    :param target_pos: 目标的位置
    :param target_sz: 目标的尺寸
    :param model: 训练好的网络模型
    :param hp: 超参数
    :param device: 硬件信息
    :return: 跟踪器的state字典数据
    """

    # 初始化state字典
    state = dict()
    # 设置图像的宽高
    state['im_h'] = im.shape[0]
    state['im_w'] = im.shape[1]
    # 配置跟踪器的相关参数
    p = TrackerConfig()
    # 对参数进行更新
    p.update(hp, model.anchors)
    # 更新参数
    p.renew()
    # 获取网络模型
    net = model
    # 根据网络参数对跟踪器的参数进行更新,主要是anchors
    p.scales = model.anchors['scales']
    p.ratios = model.anchors['ratios']
    p.anchor_num = model.anchor_num
    # 生成锚点
    p.anchor = generate_anchor(model.anchors, p.score_size)
    # 图像的平均值
    avg_chans = np.mean(im, axis=(0, 1))
    # 根据设置的上下文比例,输入z 的宽高及尺寸
    wc_z = target_sz[0] + p.context_amount * sum(target_sz)
    hc_z = target_sz[1] + p.context_amount * sum(target_sz)
    s_z = round(np.sqrt(wc_z * hc_z))
    # 初始化跟踪目标 initialize the exemplar
    z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z,
                                    avg_chans)
    # 将其转换为Variable可在pythorch中进行反向传播
    z = Variable(z_crop.unsqueeze(0))
    # 专门处理模板
    net.template(z.to(device))
    # 设置使用的惩罚窗口
    if p.windowing == 'cosine':
        # 利用hanning窗的外积生成cosine窗口
        window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size))
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    # 每一个anchor都有一个对应的惩罚窗口
    window = np.tile(window.flatten(), p.anchor_num)
    # 将信息更新到state字典中
    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    return state
예제 #2
0
def siamese_init(im, target_pos, target_sz, model, hp=None, device='cpu'):
    # print("------siamese_init-------")
    state = dict()
    state['im_h'] = im.shape[0]
    state['im_w'] = im.shape[1]
    # print("im.shape[0] ", im.shape[0])
    p = TrackerConfig()
    p.update(hp, model.anchors)

    p.renew()

    net = model
    p.scales = model.anchors['scales']
    p.ratios = model.anchors['ratios']
    p.anchor_num = model.anchor_num

    p.anchor = generate_anchor(model.anchors, p.score_size)
    avg_chans = np.mean(im, axis=(0, 1))

    wc_z = target_sz[0] + p.context_amount * sum(target_sz)
    hc_z = target_sz[1] + p.context_amount * sum(target_sz)
    s_z = round(np.sqrt(wc_z * hc_z))
    # initialize the exemplar
    z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z,
                                    avg_chans)
    # print("z size (patch) ", z_crop.size())
    z = Variable(z_crop.unsqueeze(0))
    # La xarxa es guarda les features resultants (self.zf) d'haver passat el patch z per la siamesa
    net.template(z.to(device))

    if p.windowing == 'cosine':
        window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size))
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    window = np.tile(window.flatten(), p.anchor_num)

    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    # print("window = ", state['window'])
    return state
예제 #3
0
def siamese_init(im, target_pos, target_sz, model, hp=None, device='cpu'):
    state = dict()
    state['im_h'] = im.shape[0]
    state['im_w'] = im.shape[1]
    p = TrackerConfig()
    p.update(hp, model.anchors)

    p.renew()

    net = model
    p.scales = model.anchors['scales']
    p.ratios = model.anchors['ratios']
    p.anchor_num = model.anchor_num
    p.anchor = generate_anchor(model.anchors, p.score_size)
    avg_chans = np.mean(im, axis=(0, 1))

    wc_z = target_sz[0] + p.context_amount * sum(target_sz)
    hc_z = target_sz[1] + p.context_amount * sum(target_sz)
    s_z = round(np.sqrt(wc_z * hc_z))
    # initialize the exemplar
    z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z,
                                    avg_chans)

    z = Variable(z_crop.unsqueeze(0))
    net.template(z.to(device))

    if p.windowing == 'cosine':
        window = np.outer(np.hanning(p.score_size),
                          np.hanning(p.score_size))  # 外积的结果是矩阵,内积的结果是一个数
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    window = np.tile(window.flatten(), p.anchor_num)

    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    return state
예제 #4
0
파일: test.py 프로젝트: maazullah96/files
def siamese_init(im,
                 model,
                 hp=None,
                 device='cpu',
                 targets=None,
                 detector=None):
    custom_objects = detector.CustomObjects(car=True, person=True)
    state = dict()
    state['im_h'] = im.shape[0]
    state['im_w'] = im.shape[1]
    p = TrackerConfig()
    p.update(hp, model.anchors)

    p.renew()

    net = model
    p.scales = model.anchors['scales']
    p.ratios = model.anchors['ratios']
    p.anchor_num = model.anchor_num
    p.anchor = generate_anchor(model.anchors, p.score_size)
    avg_chans = np.mean(im, axis=(0, 1))

    # s_z = [ round(np.sqrt(target["target_sz"][1] + 0.123 * sum(target["target_sz"])*target["target_sz"][0] + 0.123 * sum(target["target_sz"]) ))  for target in targets ]
    # s_z = np.array(s_z)
    # print(targe)
    # targets.append(targe)
    # print(targets)
    BLUE = [255, 255, 255]

    for i, target in enumerate(targets):
        wc_z = target["target_sz"][0] + p.context_amount * sum(
            target["target_sz"])
        hc_z = target["target_sz"][1] + p.context_amount * sum(
            target["target_sz"])
        target["s_z"] = round(np.sqrt(wc_z * hc_z))

    print("out")

    # initialize the exemplar
    targets = get_subwindow_tracking(
        im,
        p.exemplar_size,
        avg_chans,
        targets=targets,
    )

    # z_f = [ net.template(Variable(target["im_to_torch"].unsqueeze(0)).to(device))  for target in targets ]

    for i, target in enumerate(targets):
        # detections = detector.detectCustomObjectsFromImage(custom_objects=custom_objects, input_image=target["im_patch"],input_type="array", output_image_path=os.path.join("image {} custom.jpg".format(i)),output_type="file", minimum_percentage_probability=30)
        # detections = detector.detectCustomObjectsFromImage(custom_objects=custom_objects, input_image=target["img"],input_type="array", output_image_path=os.path.join(execution_path , "images.jpg"),output_type="file", minimum_percentage_probability=30)
        z = Variable(target["im_to_torch"].unsqueeze(0))
        target["zf"] = net.template(z.to(device))

        del target["im_to_torch"]
        # for eachObject in detections:
        #     print(eachObject["name"] , " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"] )
        #     target["detection"] = eachObject["box_points"]

        #     print("--------------------------------")

    if p.windowing == 'cosine':
        window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size))
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    window = np.tile(window.flatten(), p.anchor_num)

    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state["targets"] = targets
    state["detector"] = detector
    # state["s_z"] = s_z
    # state["z_f"] = z_f
    return state
예제 #5
0
class SingleTracker(object):
    def __init__(self, config_path, model_path):
        args = TrackArgs()
        args.config = config_path
        args.resume = model_path

        cfg = load_config(args)
        if args.arch == 'Custom':
            from custom import Custom
            self.model = Custom(anchors=cfg['anchors'])
        else:
            parser.error('invalid architecture: {}'.format(args.arch))

        if args.resume:
            assert isfile(args.resume), '{} is not a valid file'.format(args.resume)
            self.model = load_pretrain(self.model, args.resume)
        self.model.eval()
        self.device = torch.device('cuda' if (torch.cuda.is_available() and not args.cpu) else 'cpu')
        self.model = self.model.to(self.device)

        ################# Dangerous
        self.p = TrackerConfig()
        self.p.update(cfg['hp'] if 'hp' in cfg.keys() else None, self.model.anchors)
        self.p.renew()

        self.p.scales = self.model.anchors['scales']
        self.p.ratios = self.model.anchors['ratios']
        self.p.anchor_num = self.model.anchor_num
        self.p.anchor = generate_anchor(self.model.anchors, self.p.score_size)

        if self.p.windowing == 'cosine':
            self.window = np.outer(np.hanning(self.p.score_size), np.hanning(self.p.score_size))
        elif self.p.windowing == 'uniform':
            self.window = np.ones((self.p.score_size, self.p.score_size))
        self.window = np.tile(self.window.flatten(), self.p.anchor_num)
        ################


    def get_examplar_feature(self, img, target_pos, target_sz):
        avg_chans = np.mean(img, axis=(0, 1))

        wc_z = target_sz[0] + self.p.context_amount * sum(target_sz)
        hc_z = target_sz[1] + self.p.context_amount * sum(target_sz)
        s_z = round(np.sqrt(wc_z * hc_z))
        # initialize the exemplar
        examplar = get_subwindow_tracking(img, target_pos, self.p.exemplar_size, s_z, avg_chans)

        z = Variable(examplar.unsqueeze(0))
        return self.model.template(z.to(self.device))

    def siamese_track(self, img, target_pos, target_sz, examplar_feature, debug=False, mask_enable=True, refine_enable=True):
        avg_chans = np.mean(img, axis=(0, 1))
        im_h = img.shape[0]
        im_w = img.shape[1]

        wc_x = target_sz[0] + self.p.context_amount * sum(target_sz)
        hc_x = target_sz[1] + self.p.context_amount * sum(target_sz)
        s_x = np.sqrt(wc_x * hc_x)
        '''
        scale_x = self.p.exemplar_size / s_x
        d_search = (self.p.instance_size - self.p.exemplar_size) / 2
        pad = d_search / scale_x
        s_x = s_x + 2 * pad
        crop_box = [target_pos[0] - round(s_x) / 2, target_pos[1] - round(s_x) / 2, round(s_x), round(s_x)]
        '''
        # myy
        # 上面注释的部分, 原作者写的代码可以简化为下面三句
        scale_x = self.p.exemplar_size / s_x
        s_x = self.p.instance_size / self.p.exemplar_size * s_x
        crop_box = [target_pos[0] - round(s_x) / 2, target_pos[1] - round(s_x) / 2, round(s_x), round(s_x)]


        # extract scaled crops for search region x at previous target position
        x_crop = Variable(get_subwindow_tracking(img, target_pos, self.p.instance_size, round(s_x), avg_chans).unsqueeze(0))

        if mask_enable:
            score, delta, mask = self.model.track_mask(examplar_feature, x_crop.to(self.device))
        else:
            score, delta = self.model.track(examplar_feature, x_crop.to(self.device))

        delta = delta.permute(1, 2, 3, 0).contiguous().view(4, -1).data.cpu().numpy()
        score = F.softmax(score.permute(1, 2, 3, 0).contiguous().view(2, -1).permute(1, 0), dim=1).data[:,
                1].cpu().numpy()

        delta[0, :] = delta[0, :] * self.p.anchor[:, 2] + self.p.anchor[:, 0]
        delta[1, :] = delta[1, :] * self.p.anchor[:, 3] + self.p.anchor[:, 1]
        delta[2, :] = np.exp(delta[2, :]) * self.p.anchor[:, 2]
        delta[3, :] = np.exp(delta[3, :]) * self.p.anchor[:, 3]

        def change(r):
            return np.maximum(r, 1. / r)

        def sz(w, h):
            pad = (w + h) * 0.5
            sz2 = (w + pad) * (h + pad)
            return np.sqrt(sz2)

        def sz_wh(wh):
            pad = (wh[0] + wh[1]) * 0.5
            sz2 = (wh[0] + pad) * (wh[1] + pad)
            return np.sqrt(sz2)

        # size penalty
        target_sz_in_crop = target_sz*scale_x
        s_c = change(sz(delta[2, :], delta[3, :]) / (sz_wh(target_sz_in_crop)))  # scale penalty
        r_c = change((target_sz_in_crop[0] / target_sz_in_crop[1]) / (delta[2, :] / delta[3, :]))  # ratio penalty

        penalty = np.exp(-(r_c * s_c - 1) * self.p.penalty_k)
        pscore = penalty * score

        # cos window (motion model)
        pscore = pscore * (1 - self.p.window_influence) + self.window * self.p.window_influence
        best_pscore_id = np.argmax(pscore)

        pred_in_crop = delta[:, best_pscore_id] / scale_x
        lr = penalty[best_pscore_id] * score[best_pscore_id] * self.p.lr  # lr for OTB

        res_x = pred_in_crop[0] + target_pos[0]
        res_y = pred_in_crop[1] + target_pos[1]

        res_w = target_sz[0] * (1 - lr) + pred_in_crop[2] * lr
        res_h = target_sz[1] * (1 - lr) + pred_in_crop[3] * lr

        target_pos = np.array([res_x, res_y])
        target_sz = np.array([res_w, res_h])

        # for Mask Branch
        if mask_enable:
            best_pscore_id_mask = np.unravel_index(best_pscore_id, (5, self.p.score_size, self.p.score_size))
            delta_x, delta_y = best_pscore_id_mask[2], best_pscore_id_mask[1]

            if refine_enable:
                mask = self.model.track_refine((delta_y, delta_x)).to(self.device).sigmoid().squeeze().view(
                    self.p.out_size, self.p.out_size).cpu().data.numpy()
            else:
                mask = mask[0, :, delta_y, delta_x].sigmoid(). \
                    squeeze().view(self.p.out_size, self.p.out_size).cpu().data.numpy()

            def crop_back(image, bbox, out_sz, padding=-1):
                a = (out_sz[0] - 1) / bbox[2]
                b = (out_sz[1] - 1) / bbox[3]
                c = -a * bbox[0]
                d = -b * bbox[1]
                mapping = np.array([[a, 0, c],
                                    [0, b, d]]).astype(np.float)
                crop = cv2.warpAffine(image, mapping, (out_sz[0], out_sz[1]),
                                    flags=cv2.INTER_LINEAR,
                                    borderMode=cv2.BORDER_CONSTANT,
                                    borderValue=padding)
                return crop

            s = crop_box[2] / self.p.instance_size
            sub_box = [crop_box[0] + (delta_x - self.p.base_size / 2) * self.p.total_stride * s,
                    crop_box[1] + (delta_y - self.p.base_size / 2) * self.p.total_stride * s,
                    s * self.p.exemplar_size, s * self.p.exemplar_size]
            s = self.p.out_size / sub_box[2]
            back_box = [-sub_box[0] * s, -sub_box[1] * s, im_w * s, im_h * s]
            mask_in_img = crop_back(mask, back_box, (im_w, im_h))

            target_mask = (mask_in_img > self.p.seg_thr).astype(np.uint8)
            if cv2.__version__[-5] == '4':
                contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
            else:
                _, contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
            cnt_area = [cv2.contourArea(cnt) for cnt in contours]
            if len(contours) != 0 and np.max(cnt_area) > 100:
                contour = contours[np.argmax(cnt_area)]  # use max area polygon
                polygon = contour.reshape(-1, 2)
                # pbox = cv2.boundingRect(polygon)  # Min Max Rectangle
                prbox = cv2.boxPoints(cv2.minAreaRect(polygon))  # Rotated Rectangle

                # box_in_img = pbox
                rbox_in_img = prbox
            else:  # empty mask
                location = cxy_wh_2_rect(target_pos, target_sz)
                rbox_in_img = np.array([[location[0], location[1]],
                                        [location[0] + location[2], location[1]],
                                        [location[0] + location[2], location[1] + location[3]],
                                        [location[0], location[1] + location[3]]])

        target_pos[0] = max(0, min(im_w, target_pos[0]))
        target_pos[1] = max(0, min(im_h, target_pos[1]))
        target_sz[0] = max(10, min(im_w, target_sz[0]))
        target_sz[1] = max(10, min(im_h, target_sz[1]))

        score = score[best_pscore_id]
        mask = mask_in_img if mask_enable else []
        return target_pos, target_sz, score, mask
예제 #6
0
def siamese_init(im,
                 search_shape,
                 target_pos,
                 target_sz,
                 model,
                 hp=None,
                 device='cpu'):
    """
    generate anchors, inference the template image, set up window
    :param im: whole image
    :param target_pos: target position that are selected
    :param target_sz: target size that are selected
    :param model: SiamMask model
    :param hp: hyper parameters
    :param device:
    :return:
    """
    state = dict()
    state['im_h'] = search_shape[0]
    state['im_w'] = search_shape[1]
    p = TrackerConfig()
    p.update(hp, model.anchors)

    p.renew()

    net = model
    p.scales = model.anchors['scales']
    p.ratios = model.anchors['ratios']
    p.anchor_num = model.anchor_num
    p.anchor = generate_anchor(
        model.anchors, p.score_size)  # anchor size: (25*25*5, 4) --> (3125, 4)
    avg_chans = np.mean(im, axis=(0, 1))

    # wc_z = target_sz[0] + p.context_amount * sum(target_sz)
    # hc_z = target_sz[1] + p.context_amount * sum(target_sz)
    # s_z = round(np.sqrt(wc_z * hc_z))  # crop size = sqrt((w+(w+h)/2)*(h+(w+h)/2))
    ## initialize the exemplar
    #im_patch = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans, out_mode="numpy")
    im_patch = im
    im_patch = cv2.resize(im_patch, (p.exemplar_size, p.exemplar_size))

    cv2.imshow('crop_template', im_patch)
    cv2.waitKey(0)
    z_crop = im_to_torch(im_patch)

    z = Variable(z_crop.unsqueeze(0))
    net.template(z.to(device))

    if p.windowing == 'cosine':
        window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size))
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))
    window = np.tile(window.flatten(), p.anchor_num)

    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos
    state['target_sz'] = target_sz
    return state
예제 #7
0
def siamese_init(im,
                 target_pos,
                 target_sz,
                 model,
                 hp=None,
                 device='cpu'):  #target_pos, target_sz输入的就是由gt轴对称得来的
    state = dict()
    state['im_h'] = im.shape[0]
    state['im_w'] = im.shape[1]
    p = TrackerConfig()  #配置参数
    p.update(hp,
             model.anchors)  #用hp,model.anchors更新p的参数,相当于用config_vot.json更新p
    p.renew()
    #    p.score_size=25
    net = model
    p.scales = model.anchors['scales']  #Custom的父类SiamMask里的属性
    p.ratios = model.anchors['ratios']
    p.anchor_num = model.anchor_num  #vot数据集上是5

    p.anchor = generate_anchor(
        model.anchors, p.score_size
    )  #generate_anchor 生成锚点。p.anchor.shape = (p.anchor_num*p.score_size*p.score_size,4)
    avg_chans = np.mean(im, axis=(0, 1))  #此处im单张图片,对每个颜色通道都求均值(3,)(B,G,R)

    #图像预处理,按比例外扩目标框,从而获得一定的 context 信息。p.context_amount = 0.5
    wc_z = target_sz[0] + p.context_amount * sum(
        target_sz)  #wc_z = w + p.context_amuont * (w+h)
    hc_z = target_sz[1] + p.context_amount * sum(
        target_sz)  #hc_z = h + p.context_amuont * (w+h)
    #需要将框定的框做一个大约2倍放大,以物体为中心, s_z为宽高,截取一个正方体的物体出来
    s_z = round(np.sqrt(wc_z *
                        hc_z))  #round四舍五入取整,round(2.5) = 2,round(2.51)=3
    # initialize the exemplar
    z_crop = get_subwindow_tracking(
        im, target_pos, p.exemplar_size, s_z,
        avg_chans)  #tensor<(3, 127, 127), float32, cpu>
    #TrackerConfig中的定义是 input z size,127
    #z_crop的维度是(127*127*3)

    z = Variable(
        z_crop.unsqueeze(0)
    )  #pytorch中的命令,扩充数据维度,变成神经网络的参数tensor<(1, 3, 127, 127), float32, cpu>
    net.template(z.to(device))  #将z送到cuda上面提取特征,即得到resnet50之后的结果

    if p.windowing == 'cosine':  #默认
        window = np.outer(
            np.hanning(p.score_size), np.hanning(p.score_size)
        )  #求外积 ndarray(p.score_size,p.score_size)即<(25, 25), float64>
    elif p.windowing == 'uniform':
        window = np.ones((p.score_size, p.score_size))

    window = np.tile(window.flatten(),
                     p.anchor_num)  #对window.flatten()在X轴进行重复p.anchor_num次
    #ndarray<(3125,), float64>,p.anchor_num=5
    state['p'] = p
    state['net'] = net
    state['avg_chans'] = avg_chans
    state['window'] = window
    state['target_pos'] = target_pos  #还是传进来的数据
    state['target_sz'] = target_sz  #还是传进来的数据
    return state