示例#1
0
 def expand(example):
     """Expand all keys to a minimum rank of 1."""
     for key in example:
         example[key] = expand_to_rank(example[key],
                                       target_rank=1,
                                       prepend=True)
     return example
示例#2
0
        def find_peaks(example):
            with tf.device(device_name):
                confmaps = example[self.confmaps_key]

                if self.integral:
                    # Find peaks via integral regression.
                    peaks, peak_vals = find_global_peaks_integral(
                        confmaps,
                        threshold=self.peak_threshold,
                        crop_size=self.integral_patch_size
                    )
                    peaks *= tf.cast(self.confmaps_stride, tf.float32)

                else:
                    # Find peaks via standard grid aligned global argmax.
                    confmaps = expand_to_rank(confmaps, target_rank=4, prepend=True)
                    peaks, peak_vals = find_global_peaks(
                        confmaps, threshold=self.peak_threshold
                    )

                    peaks *= tf.cast(self.confmaps_stride, tf.float32)

                    if tf.rank(example[self.confmaps_key]) == 3:
                        peaks = tf.squeeze(peaks, axis=0)
                        peak_vals = tf.squeeze(peak_vals, axis=0)

                example[self.peaks_key] = peaks
                example[self.peak_vals_key] = peak_vals

                if not self.keep_confmaps:
                    example.pop(self.confmaps_key)

                return example
示例#3
0
文件: edge_maps.py 项目: rlinus/sleap
def distance_to_edge(points: tf.Tensor, edge_source: tf.Tensor,
                     edge_destination: tf.Tensor) -> tf.Tensor:
    """Compute pairwise distance between points and undirected edges.

    Args:
        points: Tensor of dtype tf.float32 of shape (d_0, ..., d_n, 2) where the last
            axis corresponds to x- and y-coordinates. Distances will be broadcast across
            all point dimensions.
        edge_source: Tensor of dtype tf.float32 of shape (n_edges, 2) where the last
            axis corresponds to x- and y-coordinates of the source points of each edge.
        edge_destination: Tensor of dtype tf.float32 of shape (n_edges, 2) where the
            last axis corresponds to x- and y-coordinates of the source points of each
            edge.

    Returns:
        A tensor of dtype tf.float32 of shape (d_0, ..., d_n, n_edges) where the first
        axes correspond to the initial dimensions of `points`, and the last indicates
        the distance of each point to each edge.
    """
    # Ensure all points are at least rank 2.
    points = expand_to_rank(points, 2)
    edge_source = expand_to_rank(edge_source, 2)
    edge_destination = expand_to_rank(edge_destination, 2)

    # Compute number of point dimensions.
    n_pt_dims = tf.rank(points) - 1

    # Direction vector.
    direction_vector = edge_destination - edge_source  # (n_edges, 2)

    # Edge length.
    edge_length = tf.maximum(tf.reduce_sum(tf.square(direction_vector),
                                           axis=1), 1)  # (n_edges,)

    # Adjust query points relative to edge source point.
    source_relative_points = tf.expand_dims(points, axis=-2) - expand_to_rank(
        edge_source, n_pt_dims + 2)  # (..., n_edges, 2)

    # Project points to edge line.
    line_projections = tf.reduce_sum(
        source_relative_points *
        expand_to_rank(direction_vector, n_pt_dims + 2),
        axis=3) / expand_to_rank(edge_length, n_pt_dims + 1)  # (..., n_edges)

    # Crop to line segment.
    line_projections = tf.clip_by_value(line_projections, 0,
                                        1)  # (..., n_edges)

    # Compute distance from each point to the edge.
    distances = tf.reduce_sum(
        tf.square((tf.expand_dims(line_projections, -1) *
                   expand_to_rank(direction_vector, n_pt_dims + 2)) -
                  source_relative_points),
        axis=-1,
    )  # (..., n_edges)

    return distances
示例#4
0
def test_expand_to_rank():
    np.testing.assert_array_equal(
        utils.expand_to_rank(tf.range(3), target_rank=2, prepend=True),
        [[0, 1, 2]])
    np.testing.assert_array_equal(
        utils.expand_to_rank(tf.range(3), target_rank=3, prepend=True),
        [[[0, 1, 2]]])
    np.testing.assert_array_equal(
        utils.expand_to_rank(tf.range(3), target_rank=2, prepend=False),
        [[0], [1], [2]])
    np.testing.assert_array_equal(
        utils.expand_to_rank(tf.reshape(tf.range(3), [1, 3]),
                             target_rank=2,
                             prepend=True),
        [[0, 1, 2]],
    )
    np.testing.assert_array_equal(
        utils.expand_to_rank(tf.reshape(tf.range(2 * 3 * 4), [2, 3, 4]),
                             target_rank=2),
        tf.reshape(tf.range(2 * 3 * 4), [2, 3, 4]))
示例#5
0
def scale_to_imagenet_torch_mode(image: tf.Tensor) -> tf.Tensor:
    """Scale images according to the "torch" preprocessing mode.

    This applies the preprocessing operations implemented in `tf.keras.applications` for
    models pretrained on ImageNet.

    Args:
        image: Any image tensor of rank >= 2. If rank >=3, the last axis is assumed to
            be of size 3 corresponding to RGB-ordered channels.

    Returns:
        The preprocessed image of dtype tf.float32 and shape (..., height, width, 3)
        with RGB channel ordering.

        Values will be in the approximate range of [-0.5, 0.5].

    Notes:
        The preprocessing steps applied are:
            1. If needed, expand to rank-3 by adding singleton dimensions to the end.
               This assumes rank-2 images are grayscale of shape (height, width) and
               will be expanded to (height, width, 1).
            2. Convert to RGB if not already in 3 channel format.
            3. Convert to tf.float32 in the range [0.0, 1.0].
            4. Subtract the ImageNet mean values (0.485, 0.456, 0.406) for channels in
               RGB format.
            5. Divide by the ImageNet standard deviation values (0.229, 0.224, 0.225)
               for channels in RGB format.

        This preprocessing mode is required when using pretrained DenseNet models.
    """
    image = ensure_min_image_rank(image)  # at least [height, width, 1]
    image = ensure_rgb(image)  # 3 channels
    image = ensure_float(image)  # float32 in range [0., 1.]
    imagenet_mean = tf.convert_to_tensor([0.485, 0.456, 0.406],
                                         tf.float32)  # [R, G, B]
    image = image - expand_to_rank(imagenet_mean,
                                   tf.rank(image))  # subtract from channels
    imagenet_std = tf.convert_to_tensor([0.229, 0.224, 0.225],
                                        tf.float32)  # [R, G, B]
    image = image / expand_to_rank(imagenet_std, tf.rank(image))
    return image
示例#6
0
        def rescale_points(example):
            """Local processing function for dataset mapping."""
            # Pull out data.
            points = example[self.points_key]
            scale = example[self.scale_key]

            # Make sure the scale lines up with the last dimension of the points.
            scale = expand_to_rank(scale, tf.rank(points))

            # Scale.
            if self.invert:
                points /= scale
            else:
                points *= scale

            # Update example.
            example[self.points_key] = points
            return example
示例#7
0
def make_pafs(
    xv: tf.Tensor,
    yv: tf.Tensor,
    edge_source: tf.Tensor,
    edge_destination: tf.Tensor,
    sigma: float,
) -> tf.Tensor:
    """Generate part affinity fields for a set of directed edges.

    Args:
        xv: Sampling grid vector for x-coordinates of shape (grid_width,) and dtype
            tf.float32. This can be generated by
            `sleap.nn.data.utils.make_grid_vectors`.
        yv: Sampling grid vector for y-coordinates of shape (grid_height,) and dtype
            tf.float32. This can be generated by
            `sleap.nn.data.utils.make_grid_vectors`.
        edge_source: Tensor of dtype tf.float32 of shape (n_edges, 2) where the last
            axis corresponds to x- and y-coordinates of the source points of each edge.
        edge_destination: Tensor of dtype tf.float32 of shape (n_edges, 2) where the
            last axis corresponds to x- and y-coordinates of the destination points of
            each edge.
        sigma: Standard deviation of the 2D Gaussian distribution sampled to generate
            the edge maps for masking the PAFs.

    Returns:
        A set of part affinity fields corresponding to the unit vector pointing along
        the direction of each edge weighted by the probability of each point on a
        sampling grid being on each edge. These will be in a tensor of shape
        (grid_height, grid_width, n_edges, 2) of dtype tf.float32. The last axis
        corresponds to the x- and y-coordinates of the unit vectors.
    """
    unit_vectors = edge_destination - edge_source
    unit_vectors = unit_vectors / tf.linalg.norm(unit_vectors, axis=-1, keepdims=True)
    edge_confidence_map = make_edge_maps(
        xv=xv,
        yv=yv,
        edge_source=edge_source,
        edge_destination=edge_destination,
        sigma=sigma,
    )
    pafs = tf.expand_dims(edge_confidence_map, axis=-1) * expand_to_rank(
        unit_vectors, 4
    )
    return pafs
示例#8
0
def scale_to_imagenet_caffe_mode(image: tf.Tensor) -> tf.Tensor:
    """Scale images according to the "caffe" preprocessing mode.

    This applies the preprocessing operations implemented in `tf.keras.applications` for
    models pretrained on ImageNet.

    Args:
        image: Any image tensor of rank >= 2. If rank >=3, the last axis is assumed to
            be of size 3 corresponding to RGB-ordered channels.

    Returns:
        The preprocessed image of dtype tf.float32 and shape (..., height, width, 3)
        with BGR channel ordering.

        Values will be in the approximate range of [-127.5, 127.5].

    Notes:
        The preprocessing steps applied are:
            1. If needed, expand to rank-3 by adding singleton dimensions to the end.
               This assumes rank-2 images are grayscale of shape (height, width) and
               will be expanded to (height, width, 1).
            2. Convert to RGB if not already in 3 channel format.
            3. Reverse the channel ordering to convert RGB to BGR format.
            4. Convert to tf.float32 in the range [0.0, 1.0].
            5. Scale the values to the range [0.0, 255.0].
            6. Subtract the ImageNet mean values (103.939, 116.779, 123.68) for channels
               in BGR format.

        This preprocessing mode is required when using pretrained ResNetV1 models.
    """
    image = ensure_min_image_rank(image)  # at least [height, width, 1]
    image = ensure_rgb(image)  # 3 channels
    image = convert_rgb_to_bgr(image)  # reverse channel order
    image = ensure_float(image)  # float32 in range [0., 1.]
    image = scale_image_range(
        image, min_val=0.0, max_val=255.0
    )  # float32 in range [0, 255]
    imagenet_mean = tf.convert_to_tensor(
        [103.939, 116.779, 123.68], tf.float32
    )  # [B, G, R]
    image = image - expand_to_rank(
        imagenet_mean, tf.rank(image)
    )  # subtract from channels
    return image
示例#9
0
        def norm_instance(example):
            """Local processing function for dataset mapping."""
            centroids = example[self.centroid_key] / example["scale"]

            bboxes = example["bbox"]
            bboxes = expand_to_rank(bboxes, 2)
            bboxes_x1y1 = tf.gather(bboxes, [1, 0], axis=1)

            pts = example[self.peaks_key]
            pts += bboxes_x1y1
            pts /= example["scale"]

            example[self.new_centroid_key] = centroids
            example[self.new_centroid_confidence_key] = example[
                self.centroid_confidence_key
            ]
            example[self.new_peaks_key] = pts
            example[self.new_peak_confidences_key] = example[self.peak_confidences_key]
            return example
示例#10
0
def ensure_min_image_rank(image: tf.Tensor) -> tf.Tensor:
    """Expand the image to a minimum rank of 3 by adding single dimensions.

    Args:
        image: Tensor of any rank and dtype.

    Returns:
        The expanded image to a minimum rank of 3.

        If the input was rank-2, it is assumed be of shape (height, width), so a
        singleton channels axis is appended to produce a tensor of shape
        (height, width, 1).

        If the image was already of rank >= 3, it will be returned without changes.

    See also: sleap.nn.data.utils.expand_to_rank
    """
    if tf.rank(image) < 3:
        return expand_to_rank(image, 3, prepend=False)
    else:
        return image
示例#11
0
        def predict(example):
            with tf.device(device_name):
                X = []
                for input_key in self.model_input_keys:
                    input_rank = tf.rank(example[input_key])
                    X.append(
                        expand_to_rank(example[input_key], target_rank=4, prepend=True)
                    )

                Y = self.keras_model(X)
                if not isinstance(Y, list):
                    Y = [Y]

                for output_key, y in zip(self.model_output_keys, Y):
                    if isinstance(y, list):
                        y = y[0]
                    if input_rank < tf.rank(y):
                        y = tf.squeeze(y, axis=0)
                    example[output_key] = y

                return example
示例#12
0
        def find_peaks(example):
            with tf.device(device_name):
                confmaps = example[self.confmaps_key]

                if self.integral:
                    # Find local peaks with local NMS + integral refinement.
                    (
                        peaks,
                        peak_vals,
                        peak_sample_inds,
                        peak_channel_inds,
                    ) = find_local_peaks_integral(
                        confmaps, threshold=self.peak_threshold
                    )

                else:
                    # Find local peaks with grid-aligned NMS.
                    confmaps = expand_to_rank(confmaps, target_rank=4, prepend=True)
                    (
                        peaks,
                        peak_vals,
                        peak_sample_inds,
                        peak_channel_inds,
                    ) = find_local_peaks(confmaps, threshold=self.peak_threshold)

                # Adjust for confidence map stride.
                peaks *= tf.cast(self.confmaps_stride, tf.float32)

                example[self.peaks_key] = peaks
                example[self.peak_vals_key] = peak_vals
                example[self.peak_sample_inds_key] = peak_sample_inds
                example[self.peak_channel_inds_key] = peak_channel_inds

                if not self.keep_confmaps:
                    example.pop(self.confmaps_key)

                return example