示例#1
0
    def get_pair(self, i, output=''):
        """ Procedure:
        This function applies a series of random transformations to one original image 
        to form a synthetic image pairs with perfect ground-truth.
        """
        img_a, img_b_, metadata = self.dataset.get_pair(i, output)

        img_b = self.trf({'img': img_b_, 'persp': (1, 0, 0, 0, 1, 0, 0, 0)})
        trf = img_b['persp']

        if 'aflow' in metadata or 'flow' in metadata:
            aflow = metadata['aflow']
            aflow[:] = persp_apply(trf, aflow.reshape(-1,
                                                      2)).reshape(aflow.shape)
            W, H = img_a.size
            flow = metadata['flow']
            mgrid = np.mgrid[0:H, 0:W][::-1].transpose(1, 2,
                                                       0).astype(np.float32)
            flow[:] = aflow - mgrid

        if 'corres' in metadata:
            corres = metadata['corres']
            corres[:, 1] = persp_apply(trf, corres[:, 1])

        if 'homography' in metadata:
            # p_b = homography * p_a
            trf_ = np.float32(trf + (1, )).reshape(3, 3)
            metadata['homography'] = np.float32(trf_ @ metadata['homography'])

        return img_a, img_b['img'], metadata
示例#2
0
    def get_pair(self, i, output=''):
        img_a, img_b_, metadata = self.dataset.get_pair(i, output)
        img_b = self.trf({'img': img_b_, 'persp': (1, 0, 0, 0, 1, 0, 0, 0)})
        trf = img_b['persp']

        # 将对应的光流矩阵也进行变换
        if 'aflow' in metadata or 'flow' in metadata:
            aflow = metadata['aflow']
            aflow[:] = persp_apply(trf, aflow.reshape(-1,
                                                      2)).reshape(aflow.shape)
            W, H = img_a.size
            flow = metadata['flow']
            mgrid = np.mgrid[0:H, 0:W][::-1].transpose(1, 2,
                                                       0).astype(np.float32)
            flow[:] = aflow - mgrid

        if 'corres' in metadata:
            # TODO 没有搞清楚corres的维度大小
            corres = metadata['corres']
            corres[:, 1] = persp_apply(trf, corres[:, 1])

        if 'homography' in metadata:
            # p_b = homography * p_a
            trf_ = np.float32(trf + (1, )).reshape(3, 3)
            metadata['homography'] = np.float32(trf_ @ metadata['homography'])

        return img_a, img_b['img'], metadata
示例#3
0
    def get_pair(self, i, output=('aflow')):
        """ Procedure:
        This function applies a series of random transformations to one original image 
        to form a synthetic image pairs with perfect ground-truth.
        """
        if isinstance(output, str):
            output = output.split()

        original_img = self.dataset.get_image(i)

        scaled_image = self.scale(original_img)
        scaled_image, scaled_image2 = self.make_pair(scaled_image)
        scaled_and_distorted_image = self.distort(
            dict(img=scaled_image2, persp=(1, 0, 0, 0, 1, 0, 0, 0)))
        W, H = scaled_image.size
        trf = scaled_and_distorted_image['persp']

        meta = dict()
        if 'aflow' in output or 'flow' in output:
            # compute optical flow
            xy = np.mgrid[0:H, 0:W][::-1].reshape(2, H * W).T
            aflow = np.float32(persp_apply(trf, xy).reshape(H, W, 2))
            meta['flow'] = aflow - xy.reshape(H, W, 2)
            meta['aflow'] = aflow

        if 'homography' in output:
            meta['homography'] = np.float32(trf + (1, )).reshape(3, 3)

        return scaled_image, scaled_and_distorted_image['img'], meta
示例#4
0
    def get_pair(self, i, output='aflow'):
        if isinstance(output, str):
            output = output.split()
        # 这里做出图片对,即原图片和改变后的图片,tvf是变换矩阵
        original_img = self.dataset.get_image(i)
        scaled_image = self.scale(original_img)
        scaled_image, scaled_image2 = self.make_pair(scaled_image)
        scaled_and_distorted_image = self.distort(
            dict(img=scaled_image2, persp=(1, 0, 0, 0, 1, 0, 0, 0)))
        W, H = scaled_image.size
        # 这里是变换后的
        trf = scaled_and_distorted_image['persp']
        meta = dict()
        if 'aflow' in output or 'flow' in output:
            # compute optical flow
            # 这里xy生成了位置对,以列进行遍历,这里一定要先2*n再转置,否则会乱掉
            xy = np.mgrid[0:H, 0:W][::-1].reshape(2, H * W).T
            # 输出的还是(n,2),只不过是原始点的对应点
            aflow = np.float32(persp_apply(trf, xy).reshape(H, W, 2))
            # 注意这里储存的格式是(H,W,2)
            meta['flow'] = aflow - xy.reshape(H, W, 2)
            meta['aflow'] = aflow
        if 'homography' in output:
            # 这里homograph存的是3*3,不是1*8
            meta['homography'] = np.float32(trf + (1, )).reshape(3, 3)

        return scaled_image, scaled_and_distorted_image['img'], meta
示例#5
0
    def __getitem__(self, i):
        # 每次都生成新的随机数
        if self.idx_as_rng_seed:
            import random
            random.seed(i)
            np.random.seed(i)

        img_a, img_b, metadata = self.dataset.get_pair(i, self.what)
        # aflow 大小是[H, W, 2]
        aflow = np.float32(metadata['aflow'])
        # mask 大小是[H, W],若没有默认值是全1
        mask = metadata.get('mask', np.ones(aflow.shape[:2], np.uint8))

        # 将变换放到第二张图片
        img_b = {'img': img_b, 'persp': (1, 0, 0, 0, 1, 0, 0, 0)}
        if self.scale:
            img_b = self.scale(img_b)
        if self.distort:
            img_b = self.distort(img_b)

        # 将对应的aflow和flow改变
        aflow[:] = persp_apply(img_b['persp'], aflow.reshape(-1, 2)).reshape(aflow.shape)

        # 将对应的corres改变
        corres = None
        if 'corres' in metadata:
            corres = np.float32(metadata['corres'])
            corres[:, 1] = persp_apply(img_b['persp'], corres[:, 1])

        # apply the same transformation to the homography
        homography = None
        if 'homography' in metadata:
            homography = np.float32(metadata['homography'])
            # p_b = homography * p_a
            persp = np.float32(img_b['persp'] + (1,)).reshape(3, 3)
            homography = persp @ homography

        img_b = img_b['img']
        crop_size = self.crop({'imsize': (10000, 10000)})['imsize']
        output_size_a = min(img_a.size, crop_size)
        output_size_b = min(img_b.size, crop_size)
        img_a = np.array(img_a)
        img_b = np.array(img_b)

        ah, aw, p1 = img_a.shape
        bh, bw, p2 = img_b.shape
        assert p1 == 3
        assert p2 == 3
        assert aflow.shape == (ah, aw, 2)
        assert mask.shape == (ah, aw)

        # 计算光流的变化尺度,这里用来考虑一个点所对应的窗口大小
        # 输出是x坐标在w方向(dx[0])和h方向(dx[1])的梯度以及y坐标在w方向(dy[0])和h方向(dy[1])的梯度
        dx = np.gradient(aflow[:, :, 0])
        dy = np.gradient(aflow[:, :, 1])
        # scale在一定的范围内,[H*W]
        scale = np.sqrt(np.clip(np.abs(dx[1] * dy[0] - dx[0] * dy[1]), 1e-16, 1e16))

        # 论文中所描述的N,即窗口大小
        accu2 = np.zeros((16, 16), bool)
        Q = lambda x, w: \
            np.int32(16 * (x - w.start) / (w.stop - w.start))

        # 生成一个窗口的一个维度上的起始位置
        def window1(x, size, w):
            l = x - int(0.5 + size / 2)
            r = l + int(0.5 + size)
            if l < 0:
                l, r = (0, r - l)
            if r > w:
                l, r = (l + w - r, w)
            if l < 0:
                l, r = 0, w  # larger than width
            return slice(l, r)

        # 根据两个维度信息生成一个窗口,返回窗口内的所有点,只不过表示成n:m的形式,到时候应用在矩阵上就ok了
        def window(cx, cy, win_size, scale, img_shape):
            return (window1(cy, win_size[1] * scale, img_shape[0]),
                    window1(cx, win_size[0] * scale, img_shape[1]))

        n_valid_pixel = mask.sum()
        # 防止除0
        sample_w = mask / (1e-16 + n_valid_pixel)

        def sample_valid_pixel():
            # 从mask中等概率的取出一个点
            n = np.random.choice(sample_w.size, p=sample_w.ravel())
            y, x = np.unravel_index(n, sample_w.shape)
            # 还原点的坐标
            return x, y

        # 找到一个合适的窗口

        # 找到最合适的窗口数量
        trials = 0
        best = -np.inf, None
        for _ in range(50 * self.n_samples):
            # 如果对应的sample找够了或者在原图中没有valid的点就直接break
            if trials >= self.n_samples or n_valid_pixel == 0:
                break
            # 这是在原图的位置
            c1x, c1y = sample_valid_pixel()
            # 通过flow获得在变化后的图片中的位置
            c2x, c2y = (aflow[c1y, c1x] + 0.5).astype(np.int32)
            # 判断位置的可行性质,超过范围了就舍弃
            if not (0 <= c2x < bw and 0 <= c2y < bh):
                continue

            # Get the flow scale
            sigma = scale[c1y, c1x]

            # Determine sampling windows
            if 0.2 < sigma < 1:
                win1 = window(c1x, c1y, output_size_a, 1 / sigma, img_a.shape)
                win2 = window(c2x, c2y, output_size_b, 1, img_b.shape)
            elif 1 <= sigma < 5:
                win1 = window(c1x, c1y, output_size_a, 1, img_a.shape)
                win2 = window(c2x, c2y, output_size_b, sigma, img_b.shape)
            else:
                # bad scale
                continue

            # 将原图的窗口的所有点用aflow变成变换后图片的点,x2是所有横坐标,y2是所有纵坐标
            x2, y2 = aflow[win1].reshape(-1, 2).T.astype(np.int32)
            # 带回到win2中,找到窗口中的valid点,将在范围中点记录下来
            valid = (win2[1].start <= x2) & (x2 < win2[1].stop) & (win2[0].start <= y2) & (y2 < win2[0].stop)

            # 将valid点和mask中的有效点相乘得到最终的平均有效点score1 ∈[0,1]
            score1 = (valid * mask[win1].ravel()).mean()

            # win2中的valid点的分数
            # 每次迭代重新初始化accu2
            accu2[:] = False
            # y2[valid]会拿到y2中满足范围的纵坐标值
            # 这里通过Q归一化将坐标放在accu2(16*16)尺度中
            accu2[Q(y2[valid], win2[0]), Q(x2[valid], win2[1])] = True
            score2 = accu2.mean()

            # Check how many hits we got
            score = min(score1, score2)

            trials += 1
            if score > best[0]:
                best = score, win1, win2

        # 找不到窗口
        if None in best:
            img_a = np.zeros(output_size_a[::-1] + (3,), dtype=np.uint8)
            img_b = np.zeros(output_size_b[::-1] + (3,), dtype=np.uint8)
            aflow = np.nan * np.ones((2,) + output_size_a[::-1], dtype=np.float32)
            homography = np.nan * np.ones((3, 3), dtype=np.float32)
        else:
            # 由于从一个框截取出来变成一个新的图片,所以位置对应信息要改变
            # 比如截取框的位置img1[2:4,2:4] img2[3:5,3:5] 那么corres((2,2)(3,3))要变成((0,0)(0,0))
            # aflow,homograph等同理
            win1, win2 = best[1:]
            img_a = img_a[win1]
            img_b = img_b[win2]
            mask = mask[win1]
            aflow = aflow[win1] - np.float32([[[win2[1].start, win2[0].start]]])
            # 将mask中0对应的点映射为nan
            aflow[~mask.view(bool)] = np.nan
            # 转换为 (2,H,W)
            aflow = aflow.transpose(2, 0, 1)

            if corres is not None:
                corres[:, 0] -= (win1[1].start, win1[0].start)
                corres[:, 1] -= (win2[1].start, win2[0].start)

            if homography is not None:
                # 这里变化比较巧妙
                # win1这里要加是因为从[0,0]需要变到[win1[1].start,win1[0].start]才能真正找到对应的win2的位置
                # win2要减是因为经过win1加后索引到[win2[1].start,win2[0].start],但是要归到0,0位置,因为上面
                # 说了要把窗口裁剪为一个新的图片
                trans1 = np.eye(3, dtype=np.float32)
                trans1[:2, 2] = (win1[1].start, win1[0].start)
                trans2 = np.eye(3, dtype=np.float32)
                trans2[:2, 2] = (-win2[1].start, -win2[0].start)
                homography = trans2 @ homography @ trans1
                homography /= homography[2, 2]

            # 要考虑裁剪后的尺寸变化,要rescale到指定的尺度,然后再把上面那些东西都变化一下
            # 先看img_a
            if img_a.shape[:2][::-1] != output_size_a:
                # 缩放比例
                sx, sy = (np.float32(output_size_a) - 1) / (np.float32(img_a.shape[:2][::-1]) - 1)
                img_a = np.asarray(Image.fromarray(img_a).resize(output_size_a, Image.ANTIALIAS))
                # 这里mask要用nearest,保证mask还是映射有用的点
                mask = np.asarray(Image.fromarray(mask).resize(output_size_a, Image.NEAREST))
                # 缩放aflow 这里注意和后面的缩放img_b对比,缩放img_a的话就放大aflow就行,里面对应值不用变
                # 如果缩放img_b,那么aflow对应位置要乘以2
                afx = Image.fromarray(aflow[0]).resize(output_size_a, Image.NEAREST)
                afy = Image.fromarray(aflow[1]).resize(output_size_a, Image.NEAREST)
                aflow = np.stack((np.float32(afx), np.float32(afy)))

                # 缩放corres 这个直接缩放对应关系就行,注意改的是0维 即img_a坐标
                if corres is not None:
                    corres[:, 0] *= (sx, sy)
                # 这里缩放img_a要除法(可以想象做乘法然后再除下去得到的位置关系一样)
                # 下面缩放img_b要乘法(相当于同样的变换到了缩放后的位置)
                if homography is not None:
                    homography = homography @ np.diag(np.float32([1 / sx, 1 / sy, 1]))
                    homography /= homography[2, 2]

            if img_b.shape[:2][::-1] != output_size_b:
                sx, sy = (np.float32(output_size_b) - 1) / (np.float32(img_b.shape[:2][::-1]) - 1)
                img_b = np.asarray(Image.fromarray(img_b).resize(output_size_b, Image.ANTIALIAS))

                aflow *= [[[sx]], [[sy]]]

                if corres is not None:
                    corres[:, 1] *= (sx, sy)

                if homography is not None:
                    homography = np.diag(np.float32([sx, sy, 1])) @ homography
                    homography /= homography[2, 2]

        assert aflow.dtype == np.float32, pdb.set_trace()
        assert homography is None or homography.dtype == np.float32, pdb.set_trace()

        # 最后对flow处理一下,因为flow可以用aflow得来,所以上面一直没有处理
        if 'flow' in self.what:
            H, W = img_a.shape[:2]
            mgrid = np.mgrid[0:H, 0:W][::-1].astype(np.float32)
            flow = aflow - mgrid

        result = dict(img1=self.norm(img_a), img2=self.norm(img_b))
        for what in self.what:
            try:
                result[what] = eval(what)
            except NameError:
                pass
        return result
示例#6
0
    def __getitem__(self, i):
        # from time import time as now; t0 = now()
        if self.idx_as_rng_seed:
            import random
            random.seed(i)
            np.random.seed(i)

        # Retrieve an image pair and their absolute flow
        img_a, img_b, metadata = self.dataset.get_pair(i, self.what)

        # aflow contains pixel coordinates indicating where each
        # pixel from the left image ended up in the right image
        # as (x,y) pairs, but its shape is (H,W,2)
        aflow = np.float32(metadata['aflow'])
        mask = metadata.get('mask', np.ones(aflow.shape[:2], np.uint8))

        # apply transformations to the second image
        img_b = {'img': img_b, 'persp': (1, 0, 0, 0, 1, 0, 0, 0)}
        if self.scale:
            img_b = self.scale(img_b)
        if self.distort:
            img_b = self.distort(img_b)

        # apply the same transformation to the flow
        aflow[:] = persp_apply(img_b['persp'],
                               aflow.reshape(-1, 2)).reshape(aflow.shape)
        corres = None
        if 'corres' in metadata:
            corres = np.float32(metadata['corres'])
            corres[:, 1] = persp_apply(img_b['persp'], corres[:, 1])

        # apply the same transformation to the homography
        homography = None
        if 'homography' in metadata:
            homography = np.float32(metadata['homography'])
            # p_b = homography * p_a
            persp = np.float32(img_b['persp'] + (1, )).reshape(3, 3)
            homography = persp @ homography

        # determine crop size
        img_b = img_b['img']
        crop_size = self.crop({'imsize': (10000, 10000)})['imsize']
        output_size_a = min(img_a.size, crop_size)
        output_size_b = min(img_b.size, crop_size)
        img_a = np.array(img_a)
        img_b = np.array(img_b)

        ah, aw, p1 = img_a.shape
        bh, bw, p2 = img_b.shape
        assert p1 == 3
        assert p2 == 3
        assert aflow.shape == (ah, aw, 2)
        assert mask.shape == (ah, aw)

        # Let's start by computing the scale of the
        # optical flow and applying a median filter:
        dx = np.gradient(aflow[:, :, 0])
        dy = np.gradient(aflow[:, :, 1])
        scale = np.sqrt(
            np.clip(np.abs(dx[1] * dy[0] - dx[0] * dy[1]), 1e-16, 1e16))

        accu2 = np.zeros((16, 16), bool)
        Q = lambda x, w: np.int32(16 * (x - w.start) / (w.stop - w.start))

        def window1(x, size, w):
            l = x - int(0.5 + size / 2)
            r = l + int(0.5 + size)
            if l < 0: l, r = (0, r - l)
            if r > w: l, r = (l + w - r, w)
            if l < 0: l, r = 0, w  # larger than width
            return slice(l, r)

        def window(cx, cy, win_size, scale, img_shape):
            return (window1(cy, win_size[1] * scale, img_shape[0]),
                    window1(cx, win_size[0] * scale, img_shape[1]))

        n_valid_pixel = (abs(mask).sum()).astype(np.float64)
        prob = abs(mask).ravel().astype(np.float64) / (1e-16 + n_valid_pixel)
        prob /= sum(prob).astype(np.float64)
        is_correct_prob = False if abs(sum(prob) - 1.) > np.sqrt(
            np.finfo(prob.dtype).eps) else True
        if not is_correct_prob:
            print("Numerical error (tolerance)!!!")
            print("mask shape: ", mask.shape, "n_valid_pixel: ", n_valid_pixel)

        def sample_valid_pixel():
            n = np.random.choice(prob.size, p=prob)
            y, x = np.unravel_index(n, mask.shape)
            '''
            import random
            n = random.choices(range(sample_w.size), weights=sample_w.ravel(), k=1)[0]
            y, x = np.unravel_index(n, sample_w.shape)
            '''
            return x, y

        # Find suitable left and right windows
        trials = 0  # take the best out of few trials
        best = -np.inf, None
        for _ in range(50 * self.n_samples):
            if trials >= self.n_samples: break  # finished!

            # pick a random valid point from the first image
            if n_valid_pixel == 0 or not is_correct_prob: break
            c1x, c1y = sample_valid_pixel()

            # Find in which position the center of the left
            # window ended up being placed in the right image
            c2x, c2y = (aflow[c1y, c1x] + 0.5).astype(np.int32)
            if not (0 <= c2x < bw and 0 <= c2y < bh): continue

            # Get the flow scale
            sigma = scale[c1y, c1x]

            # Determine sampling windows
            if 0.2 < sigma < 1:
                win1 = window(c1x, c1y, output_size_a, 1 / sigma, img_a.shape)
                win2 = window(c2x, c2y, output_size_b, 1, img_b.shape)
            elif 1 <= sigma < 5:
                win1 = window(c1x, c1y, output_size_a, 1, img_a.shape)
                win2 = window(c2x, c2y, output_size_b, sigma, img_b.shape)
            else:
                continue  # bad scale

            # compute a score based on the flow
            x2, y2 = aflow[win1].reshape(-1, 2).T.astype(np.int32)
            # Check the proportion of valid flow vectors
            valid = (win2[1].start <= x2) & (x2 < win2[1].stop) \
                    & (win2[0].start <= y2) & (y2 < win2[0].stop)
            score1 = (valid * mask[win1].ravel()).mean()
            # check the coverage of the second window
            accu2[:] = False
            '''
            accu2[Q(y2[valid],win2[0]), Q(x2[valid],win2[1])] = True
            score2 = accu2.mean()
            # Check how many hits we got
            score = min(score1, score2)

            trials += 1
            if score > best[0]:
                best = score, win1, win2
            '''
            min_Q_y, max_Q_y = Q(y2[valid],
                                 win2[0]).min(), Q(y2[valid], win2[0]).max()
            min_Q_x, max_Q_x = Q(x2[valid],
                                 win2[1]).min(), Q(x2[valid], win2[1]).max()
            cond = False
            if min_Q_y >= 0 and (min_Q_y + max_Q_y) < accu2.shape[0] and \
                    min_Q_x >= 0 and (min_Q_x + max_Q_x) < accu2.shape[1]:
                cond = True

            if cond:
                accu2[Q(y2[valid], win2[0]), Q(x2[valid], win2[1])] = True
                score2 = accu2.mean()
                # Check how many hits we got
                score = min(score1, score2)

                trials += 1
                if score > best[0]:
                    best = score, win1, win2

        if None in best:  # counldn't find a good window
            img_a = np.zeros(output_size_a[::-1] + (3, ), dtype=np.uint8)
            img_b = np.zeros(output_size_b[::-1] + (3, ), dtype=np.uint8)
            aflow = np.nan * np.ones(
                (2, ) + output_size_a[::-1], dtype=np.float32)
            homography = np.nan * np.ones((3, 3), dtype=np.float32)

        else:
            win1, win2 = best[1:]
            img_a = img_a[win1]
            img_b = img_b[win2]
            aflow = aflow[win1] - np.float32([[[win2[1].start, win2[0].start]]
                                              ])
            mask = mask[win1]
            aflow[~mask.view(bool)] = np.nan  # mask bad pixels!
            aflow = aflow.transpose(2, 0, 1)  # --> (2,H,W)

            if corres is not None:
                corres[:, 0] -= (win1[1].start, win1[0].start)
                corres[:, 1] -= (win2[1].start, win2[0].start)

            if homography is not None:
                trans1 = np.eye(3, dtype=np.float32)
                trans1[:2, 2] = (win1[1].start, win1[0].start)
                trans2 = np.eye(3, dtype=np.float32)
                trans2[:2, 2] = (-win2[1].start, -win2[0].start)
                homography = trans2 @ homography @ trans1
                homography /= homography[2, 2]

            # rescale if necessary
            if img_a.shape[:2][::-1] != output_size_a:
                sx, sy = (np.float32(output_size_a) -
                          1) / (np.float32(img_a.shape[:2][::-1]) - 1)
                img_a = np.asarray(
                    Image.fromarray(img_a).resize(output_size_a,
                                                  Image.ANTIALIAS))
                mask = np.asarray(
                    Image.fromarray(mask).resize(output_size_a, Image.NEAREST))
                afx = Image.fromarray(aflow[0]).resize(output_size_a,
                                                       Image.NEAREST)
                afy = Image.fromarray(aflow[1]).resize(output_size_a,
                                                       Image.NEAREST)
                aflow = np.stack((np.float32(afx), np.float32(afy)))

                if corres is not None:
                    corres[:, 0] *= (sx, sy)

                if homography is not None:
                    homography = homography @ np.diag(
                        np.float32([1 / sx, 1 / sy, 1]))
                    homography /= homography[2, 2]

            if img_b.shape[:2][::-1] != output_size_b:
                sx, sy = (np.float32(output_size_b) -
                          1) / (np.float32(img_b.shape[:2][::-1]) - 1)
                img_b = np.asarray(
                    Image.fromarray(img_b).resize(output_size_b,
                                                  Image.ANTIALIAS))
                aflow *= [[[sx]], [[sy]]]

                if corres is not None:
                    corres[:, 1] *= (sx, sy)

                if homography is not None:
                    homography = np.diag(np.float32([sx, sy, 1])) @ homography
                    homography /= homography[2, 2]

        assert aflow.dtype == np.float32, pdb.set_trace()
        assert homography is None or homography.dtype == np.float32, pdb.set_trace(
        )
        if 'flow' in self.what:
            H, W = img_a.shape[:2]
            mgrid = np.mgrid[0:H, 0:W][::-1].astype(np.float32)
            flow = aflow - mgrid
        '''
        def aflow_to_grid(aflow):
            H, W = aflow.shape[2:]
            grid = aflow.permute(0, 2, 3, 1).clone()
            grid[:, :, :, 0] *= 2 / (W - 1)
            grid[:, :, :, 1] *= 2 / (H - 1)
            grid -= 1
            grid[torch.isnan(grid)] = 9e9  # invalids
            return grid

        aflow = torch.from_numpy(aflow).unsqueeze(0)
        grid = aflow_to_grid(aflow)
        import torch.nn.functional as F
        img_b_w = F.grid_sample(torch.from_numpy(img_b).unsqueeze(0).permute(0, 3, 1, 2).float(),
                                grid,
                                align_corners=False)

        plt.figure(figsize=(12, 8))
        plt.subplot(1, 4, 1)
        plt.axis("off")
        plt.imshow(img_a)
        plt.tight_layout()

        plt.subplot(1, 4, 2)
        plt.axis("off")
        plt.imshow(img_b)
        plt.tight_layout()

        plt.subplot(1, 4, 3)
        plt.axis("off")
        plt.imshow(img_b_w.int().squeeze().permute(1, 2, 0).numpy())
        plt.tight_layout()

        plt.show()
        sys.exit()
        '''
        if img_a.shape[0] != img_a.shape[1]:
            print("A: img_a:", img_a.shape)
            sys.exit()
        if img_b.shape[0] != img_b.shape[1]:
            print("B: img_b:", img_b.shape)
            sys.exit()

        if self.DO_COLOR_AUG:
            img1, img2 = self.aug(image=img_a)["image"], self.aug(
                image=img_b)["image"]
        else:
            img1, img2 = self.norm(img_a), self.norm(img_b)

        result = dict(img1=img1, img2=img2)
        for what in self.what:
            try:
                result[what] = eval(what)
            except NameError:
                pass
        return result
示例#7
0
文件: dataloader.py 项目: zxp771/r2d2
    def __getitem__(self, i):
        #from time import time as now; t0 = now()
        if self.idx_as_rng_seed:
            import random
            random.seed(i)
            np.random.seed(i)

        # Retrieve an image pair and their absolute flow
        img_a, img_b, metadata = self.dataset.get_pair(i, self.what)

        # aflow contains pixel coordinates indicating where each
        # pixel from the left image ended up in the right image
        # as (x,y) pairs, but its shape is (H,W,2)
        aflow = np.float32(metadata['aflow'])
        mask = metadata.get('mask', np.ones(aflow.shape[:2], np.uint8))

        # apply transformations to the second image
        img_b = {'img': img_b, 'persp': (1, 0, 0, 0, 1, 0, 0, 0)}
        if self.scale:
            img_b = self.scale(img_b)
        if self.distort:
            img_b = self.distort(img_b)

        # apply the same transformation to the flow
        aflow[:] = persp_apply(img_b['persp'],
                               aflow.reshape(-1, 2)).reshape(aflow.shape)
        corres = None
        if 'corres' in metadata:
            corres = np.float32(metadata['corres'])
            corres[:, 1] = persp_apply(img_b['persp'], corres[:, 1])

        # apply the same transformation to the homography
        homography = None
        if 'homography' in metadata:
            homography = np.float32(metadata['homography'])
            # p_b = homography * p_a
            persp = np.float32(img_b['persp'] + (1, )).reshape(3, 3)
            homography = persp @ homography

        # determine crop size
        img_b = img_b['img']
        crop_size = self.crop({'imsize': (10000, 10000)})['imsize']
        output_size_a = min(img_a.size, crop_size)
        output_size_b = min(img_b.size, crop_size)
        img_a = np.array(img_a)
        img_b = np.array(img_b)

        ah, aw, p1 = img_a.shape
        bh, bw, p2 = img_b.shape
        assert p1 == 3
        assert p2 == 3
        assert aflow.shape == (ah, aw, 2)
        assert mask.shape == (ah, aw)

        # Let's start by computing the scale of the
        # optical flow and applying a median filter:
        dx = np.gradient(aflow[:, :, 0])
        dy = np.gradient(aflow[:, :, 1])
        scale = np.sqrt(
            np.clip(np.abs(dx[1] * dy[0] - dx[0] * dy[1]), 1e-16, 1e16))

        accu2 = np.zeros((16, 16), bool)
        Q = lambda x, w: np.int32(16 * (x - w.start) / (w.stop - w.start))

        def window1(x, size, w):
            l = x - int(0.5 + size / 2)
            r = l + int(0.5 + size)
            if l < 0: l, r = (0, r - l)
            if r > w: l, r = (l + w - r, w)
            if l < 0: l, r = 0, w  # larger than width
            return slice(l, r)

        def window(cx, cy, win_size, scale, img_shape):
            return (window1(cy, win_size[1] * scale, img_shape[0]),
                    window1(cx, win_size[0] * scale, img_shape[1]))

        n_valid_pixel = mask.sum()
        sample_w = mask / (1e-16 + n_valid_pixel)

        def sample_valid_pixel():
            n = np.random.choice(sample_w.size, p=sample_w.ravel())
            y, x = np.unravel_index(n, sample_w.shape)
            return x, y

        # Find suitable left and right windows
        trials = 0  # take the best out of few trials
        best = -np.inf, None
        for _ in range(50 * self.n_samples):
            if trials >= self.n_samples: break  # finished!

            # pick a random valid point from the first image
            if n_valid_pixel == 0: break
            c1x, c1y = sample_valid_pixel()

            # Find in which position the center of the left
            # window ended up being placed in the right image
            c2x, c2y = (aflow[c1y, c1x] + 0.5).astype(np.int32)
            if not (0 <= c2x < bw and 0 <= c2y < bh): continue

            # Get the flow scale
            sigma = scale[c1y, c1x]

            # Determine sampling windows
            if 0.2 < sigma < 1:
                win1 = window(c1x, c1y, output_size_a, 1 / sigma, img_a.shape)
                win2 = window(c2x, c2y, output_size_b, 1, img_b.shape)
            elif 1 <= sigma < 5:
                win1 = window(c1x, c1y, output_size_a, 1, img_a.shape)
                win2 = window(c2x, c2y, output_size_b, sigma, img_b.shape)
            else:
                continue  # bad scale

            # compute a score based on the flow
            x2, y2 = aflow[win1].reshape(-1, 2).T.astype(np.int32)
            # Check the proportion of valid flow vectors
            valid = (win2[1].start <= x2) & (x2 < win2[1].stop) \
                  & (win2[0].start <= y2) & (y2 < win2[0].stop)
            score1 = (valid * mask[win1].ravel()).mean()
            # check the coverage of the second window
            accu2[:] = False
            accu2[Q(y2[valid], win2[0]), Q(x2[valid], win2[1])] = True
            score2 = accu2.mean()
            # Check how many hits we got
            score = min(score1, score2)

            trials += 1
            if score > best[0]:
                best = score, win1, win2

        if None in best:  # counldn't find a good window
            img_a = np.zeros(output_size_a[::-1] + (3, ), dtype=np.uint8)
            img_b = np.zeros(output_size_b[::-1] + (3, ), dtype=np.uint8)
            aflow = np.nan * np.ones(
                (2, ) + output_size_a[::-1], dtype=np.float32)
            homography = np.nan * np.ones((3, 3), dtype=np.float32)

        else:
            win1, win2 = best[1:]
            img_a = img_a[win1]
            img_b = img_b[win2]
            aflow = aflow[win1] - np.float32([[[win2[1].start, win2[0].start]]
                                              ])
            mask = mask[win1]
            aflow[~mask.view(bool)] = np.nan  # mask bad pixels!
            aflow = aflow.transpose(2, 0, 1)  # --> (2,H,W)

            if corres is not None:
                corres[:, 0] -= (win1[1].start, win1[0].start)
                corres[:, 1] -= (win2[1].start, win2[0].start)

            if homography is not None:
                trans1 = np.eye(3, dtype=np.float32)
                trans1[:2, 2] = (win1[1].start, win1[0].start)
                trans2 = np.eye(3, dtype=np.float32)
                trans2[:2, 2] = (-win2[1].start, -win2[0].start)
                homography = trans2 @ homography @ trans1
                homography /= homography[2, 2]

            # rescale if necessary
            if img_a.shape[:2][::-1] != output_size_a:
                sx, sy = (np.float32(output_size_a) -
                          1) / (np.float32(img_a.shape[:2][::-1]) - 1)
                img_a = np.asarray(
                    Image.fromarray(img_a).resize(output_size_a,
                                                  Image.ANTIALIAS))
                mask = np.asarray(
                    Image.fromarray(mask).resize(output_size_a, Image.NEAREST))
                afx = Image.fromarray(aflow[0]).resize(output_size_a,
                                                       Image.NEAREST)
                afy = Image.fromarray(aflow[1]).resize(output_size_a,
                                                       Image.NEAREST)
                aflow = np.stack((np.float32(afx), np.float32(afy)))

                if corres is not None:
                    corres[:, 0] *= (sx, sy)

                if homography is not None:
                    homography = homography @ np.diag(
                        np.float32([1 / sx, 1 / sy, 1]))
                    homography /= homography[2, 2]

            if img_b.shape[:2][::-1] != output_size_b:
                sx, sy = (np.float32(output_size_b) -
                          1) / (np.float32(img_b.shape[:2][::-1]) - 1)
                img_b = np.asarray(
                    Image.fromarray(img_b).resize(output_size_b,
                                                  Image.ANTIALIAS))
                aflow *= [[[sx]], [[sy]]]

                if corres is not None:
                    corres[:, 1] *= (sx, sy)

                if homography is not None:
                    homography = np.diag(np.float32([sx, sy, 1])) @ homography
                    homography /= homography[2, 2]

        assert aflow.dtype == np.float32, pdb.set_trace()
        assert homography is None or homography.dtype == np.float32, pdb.set_trace(
        )
        if 'flow' in self.what:
            H, W = img_a.shape[:2]
            mgrid = np.mgrid[0:H, 0:W][::-1].astype(np.float32)
            flow = aflow - mgrid

        result = dict(img1=self.norm(img_a), img2=self.norm(img_b))
        for what in self.what:
            try:
                result[what] = eval(what)
            except NameError:
                pass
        return result