예제 #1
0
    def forward(self, c_ref, p_ref, tf, tm, tx, gm,
                loss_weight):  # b,c,h,w // b,4 (y,x,h,w)
        # if first target frame (no tb)
        if tm is None:
            tm = ToCudaVariable([0.5 * torch.ones(gm.size())],
                                requires_grad=False)[0]
        tb = self.masks2yxhw(tm, tx, scale=1.5)

        oh, ow = tf.size()[2], tf.size()[3]  # original size
        fw_grid, bw_grid, theta = self.get_ROI_grid(tb,
                                                    src_size=(oh, ow),
                                                    dst_size=(256, 256),
                                                    scale=1.0)

        #  Sample target frame
        tf_roi = F.grid_sample(tf, fw_grid)
        tm_roi = F.grid_sample(torch.unsqueeze(tm, dim=1).float(), fw_grid)[:,
                                                                            0]
        tx_roi = F.grid_sample(torch.unsqueeze(tx, dim=1).float(), fw_grid)[:,
                                                                            0]

        # run Siamese Encoder
        tr5, tr4, tr3, tr2 = self.Encoder(tf_roi, tm_roi, tx_roi)
        if p_ref is None:
            a_ref = c_ref.detach()
        else:
            a_ref = self.SEFA(c_ref.detach(), p_ref.detach())
        em_roi = self.Decoder(a_ref, tr5, tr4, tr3, tr2)

        ## Losses are computed within ROI
        # CE loss
        gm_roi = F.grid_sample(torch.unsqueeze(gm, dim=1).float(), fw_grid)[:,
                                                                            0]
        gm_roi = gm_roi.detach()
        # CE loss
        CE = nn.CrossEntropyLoss(reduce=False)
        batch_CE = ToCudaVariable([torch.zeros(gm_roi.size()[0])
                                   ])[0]  # batch sized loss container
        sizes = [(256, 256), (64, 64), (32, 32), (16, 16), (8, 8)]
        for s in range(5):
            if s == 0:
                CE_s = CE(em_roi[s],
                          torch.round(gm_roi).long()).mean(-1).mean(
                              -1)  # mean over h,w
                batch_CE += loss_weight[s] * CE_s
            else:
                if loss_weight[s]:
                    gm_roi_s = torch.round(
                        F.upsample(torch.unsqueeze(gm_roi, dim=1),
                                   size=sizes[s],
                                   mode='bilinear')[:, 0]).long()
                    CE_s = CE(em_roi[s],
                              gm_roi_s).mean(-1).mean(-1)  # mean over h,w
                    batch_CE += loss_weight[s] * CE_s

        # get final output via inverse warping
        em = F.grid_sample(F.softmax(em_roi[0], dim=1), bw_grid)[:, 1]
        return em, batch_CE, a_ref
예제 #2
0
파일: run.py 프로젝트: videoturingtest/RGMP
def Encode_MS(val_F1, val_P1, scales):
    ref = {}
    for sc in scales:
        if sc != 1.0:
            msv_F1, msv_P1 = downsample([val_F1, val_P1], sc)
            msv_F1, msv_P1 = ToCudaVariable([msv_F1, msv_P1], volatile=True)
            ref[sc] = model.module.Encoder(msv_F1, msv_P1)[0]
        else:
            msv_F1, msv_P1 = ToCudaVariable([val_F1, val_P1], volatile=True)
            ref[sc] = model.module.Encoder(msv_F1, msv_P1)[0]

    return ref
예제 #3
0
 def is_there_scribble(self, p, n):
     num_pixel_p = np.sum(p.data.cpu().numpy(), axis=(1, 2))
     num_pixel_n = np.sum(n.data.cpu().numpy(), axis=(1, 2))
     num_pixel = num_pixel_p + num_pixel_n
     yes = (num_pixel > 0).astype(np.float32)
     mulplier = 1 / (np.mean(yes) + 0.001)
     yes = yes * mulplier
     return ToCudaVariable([torch.from_numpy(yes.copy()).float()])[0]
예제 #4
0
    def all2yxhw(self, mask, pos, neg, scale=1.0):
        np_mask = mask.data.cpu().numpy()
        np_pos = pos.data.cpu().numpy()
        np_neg = neg.data.cpu().numpy()

        np_yxhw = np.zeros((np_mask.shape[0], 4), dtype=np.float32)
        for b in range(np_mask.shape[0]):
            mys, mxs = np.where(np_mask[b] >= 0.49)
            pys, pxs = np.where(np_pos[b] >= 0.49)
            nys, nxs = np.where(np_neg[b] >= 0.49)
            all_ys = np.concatenate([mys, pys, nys])
            all_xs = np.concatenate([mxs, pxs, nxs])

            if all_ys.size == 0 or all_xs.size == 0:
                # if no pixel, return whole
                ymin, ymax = 0, np_mask.shape[1]
                xmin, xmax = 0, np_mask.shape[2]
            else:
                ymin, ymax = np.min(all_ys), np.max(all_ys)
                xmin, xmax = np.min(all_xs), np.max(all_xs)

            # make sure minimum 128 original size
            if (ymax - ymin) < 128:
                res = 128. - (ymax - ymin)
                ymin -= int(res / 2)
                ymax += int(res / 2)

            if (xmax - xmin) < 128:
                res = 128. - (xmax - xmin)
                xmin -= int(res / 2)
                xmax += int(res / 2)

            # apply scale
            # y = (ymax + ymin) / 2.
            # x = (xmax + xmin) / 2.
            orig_h = ymax - ymin + 1
            orig_w = xmax - xmin + 1

            ymin = np.maximum(-5, ymin - (scale - 1) / 2. * orig_h)
            ymax = np.minimum(np_mask.shape[1] + 5,
                              ymax + (scale - 1) / 2. * orig_h)
            xmin = np.maximum(-5, xmin - (scale - 1) / 2. * orig_w)
            xmax = np.minimum(np_mask.shape[2] + 5,
                              xmax + (scale - 1) / 2. * orig_w)

            # final ywhw
            y = (ymax + ymin) / 2.
            x = (xmax + xmin) / 2.
            h = ymax - ymin + 1
            w = xmax - xmin + 1

            yxhw = np.array([y, x, h, w], dtype=np.float32)

            np_yxhw[b] = yxhw

        return ToCudaVariable([torch.from_numpy(np_yxhw.copy()).float()])[0]
예제 #5
0
 def init_variables(self, frames):
     self.all_F = torch.unsqueeze(torch.from_numpy(np.transpose(frames, (3, 0, 1, 2))).float() / 255., dim=0) # 1,3,t,h,w
     self.all_E = torch.zeros(1, self.num_frames, self.height, self.width)  # 1,t,h,w
     self.prev_E = torch.zeros(1, self.num_frames, self.height, self.width)  # 1,t,h,w
     self.dummy_M = torch.zeros(1, self.height, self.width).long()
     # to cuda
     self.all_F, self.all_E, self.prev_E, self.dummy_M = ToCudaVariable([self.all_F, self.all_E, self.prev_E, self.dummy_M], volatile=True)
     
     self.ref = None
     self.a_ref = None
     self.next_a_ref = None
     self.prev_targets = []
예제 #6
0
파일: run.py 프로젝트: videoturingtest/RGMP
def Propagate_MS(ref, val_F2, val_P2, scales):
    h, w = val_F2.size()[2], val_F2.size()[3]
    msv_E2 = {}
    for sc in scales:
        if sc != 1.0:
            msv_F2, msv_P2 = downsample([val_F2, val_P2], sc)
            msv_F2, msv_P2 = ToCudaVariable([msv_F2, msv_P2], volatile=True)
            r5, r4, r3, r2 = model.module.Encoder(msv_F2, msv_P2)
            e2 = model.module.Decoder(r5, ref[sc], r4, r3, r2)
            msv_E2[sc] = upsample(
                F.softmax(e2[0], dim=1)[:, 1].data.cpu(), (h, w))
        else:
            msv_F2, msv_P2 = ToCudaVariable([val_F2, val_P2], volatile=True)
            r5, r4, r3, r2 = model.module.Encoder(msv_F2, msv_P2)
            e2 = model.module.Decoder(r5, ref[sc], r4, r3, r2)
            msv_E2[sc] = F.softmax(e2[0], dim=1)[:, 1].data.cpu()

    val_E2 = torch.zeros(val_P2.size())
    for sc in scales:
        val_E2 += msv_E2[sc]
    val_E2 /= len(scales)
    return val_E2
예제 #7
0
    def get_ROI_grid(self, roi, src_size, dst_size, scale=1.):
        # scale height and width
        ry, rx, rh, rw = roi[:,
                             0], roi[:,
                                     1], scale * roi[:, 2], scale * roi[:, 3]

        # convert ti minmax
        ymin = ry - rh / 2.
        ymax = ry + rh / 2.
        xmin = rx - rw / 2.
        xmax = rx + rw / 2.

        h, w = src_size[0], src_size[1]
        # theta
        theta = ToCudaVariable([torch.zeros(roi.size()[0], 2, 3)])[0]
        theta[:, 0, 0] = (xmax - xmin) / (w - 1)
        theta[:, 0, 2] = (xmin + xmax - (w - 1)) / (w - 1)
        theta[:, 1, 1] = (ymax - ymin) / (h - 1)
        theta[:, 1, 2] = (ymin + ymax - (h - 1)) / (h - 1)

        #inverse of theta
        inv_theta = ToCudaVariable([torch.zeros(roi.size()[0], 2, 3)])[0]
        det = theta[:, 0, 0] * theta[:, 1, 1]
        adj_x = -theta[:, 0, 2] * theta[:, 1, 1]
        adj_y = -theta[:, 0, 0] * theta[:, 1, 2]
        inv_theta[:, 0, 0] = w / (xmax - xmin)
        inv_theta[:, 1, 1] = h / (ymax - ymin)
        inv_theta[:, 0, 2] = adj_x / det
        inv_theta[:, 1, 2] = adj_y / det
        # make affine grid
        fw_grid = F.affine_grid(
            theta, torch.Size((roi.size()[0], 1, dst_size[0], dst_size[1])))
        bw_grid = F.affine_grid(
            inv_theta, torch.Size(
                (roi.size()[0], 1, src_size[0], src_size[1])))
        return fw_grid, bw_grid, theta