Exemplo n.º 1
0
    def __collect_info(self):
        self._embedding_layer = self._coords.get_parent_layer()
        assert isinstance(self._embedding_layer, SkeletonEmbeddingLayer), d_msg(
            self._context,
            "coords tensor's parent layer must be of type SkeletonEmbeddingLayer, "
            f"received parent layer type={type(self._embedding_layer)}, "
            f"parent layer name={self._embedding_layer.get_name()}"
        )
        # The method returns a list. Convert it to ndarray
        self._source_embedding = np.array(self._embedding_layer.get_embedding())

        # Make a check of the absolute values of the embedding. They should be normalized and approximately within
        # [-1, 1] interval.
        if np.max(np.abs(self._source_embedding)) > 1.4:
            print(d_msg(
                self._context,
                "It seems the embedding's values are not normalized. This is not an error, but the values should "
                "be centered and lie approximately within the [-1, 1] interval. Received embedding with "
                f"maximum absolute value of {np.max(np.abs(self._source_embedding))}"
            ))
        self._embedding_bounding_box = make_box(self._source_embedding)
        width = self._embedding_bounding_box[2] - self._embedding_bounding_box[0]
        height = self._embedding_bounding_box[3] - self._embedding_bounding_box[1]
        # Determine how much this bounding box differs from the default one.
        # Default box has the following coordinates:
        # - top left point = [-1, -1]
        # - bottom right point = [1, 1]
        self._bbox_configuration = [width / 2.0, height / 2.0]

        coords_shape = self._coords.get_shape()
        self._grid_size = coords_shape[1:-1]
Exemplo n.º 2
0
    def forward(self, x, computation_mode=MakiLayer.INFERENCE_MODE):
        # Do not add the name_scope since in future it won't be used anyway
        _, h, w, c = x.get_shape().as_list()
        assert c == self._embedding_dim * 2, d_msg(
            self.get_name(),
            'The depth of the input tensor must twice as large as the embedding dimensionality. '
            f'Received input tensor channels={c}, embedding dimensionality*2={self._embedding_dim * 2}'
        )
        offsets = x

        grid = SkeletonEmbeddingLayer.generate_grid_stacked((w, h), self._embedding)
        with tf.name_scope('GridCorrection'):
            # This scaling is required to make the offsets be
            # approximately in the range [-1, 1]
            scale = np.array([w, h], dtype='float32')
            flatten = lambda t: tf.reshape(t, shape=[-1, h, w, self._embedding_dim * 2])
            unflatten = lambda t: tf.reshape(t, shape=[-1, h, w, self._embedding_dim, 2])

            grid = unflatten(grid)
            upscaled_grid = grid * scale
            upscaled_grid = flatten(upscaled_grid)

            corrected_grid = upscaled_grid + offsets

            corrected_grid = unflatten(corrected_grid)
            downscaled_grid = corrected_grid / scale
            downscaled_grid = flatten(downscaled_grid)

        return downscaled_grid
Exemplo n.º 3
0
    def __init__(self, coords: tf.Tensor, point_indicators: tf.Tensor,
                 human_indicators: tf.Tensor, configuration):
        """
        An entity that encapsulates all the tensors necessary to make predictions on a particular grid.
        It makes makes sure the shapes are synchronized and also collects necessary info for the trainer.

        Parameters
        ----------
        coords : tf.Tensor
            Tensor of the regressed coordinates of the skeleton points. Must lie approximately within
            the [-1, 1] interval.
        point_indicators : tf.Tensor
            Tensor of binary indicators of whether a particular point of the skeleton is visible.
        human_indicators : tf.Tensor
            Tensor of binary indicators of whether a human is present in a particular location
            of the grid.
        """
        self._context = f'SSP HeadLabel({coords.name}, {point_indicators.name}, {human_indicators.name}, {configuration})'

        self._coords = coords
        self._point_indicators = point_indicators
        self._human_indicators = human_indicators
        assert len(configuration) == 4, d_msg(
            self._context,
            f'Configuration must has length=4, received length={configuration}'
        )
        h, w, w_scale, h_scale = configuration
        self._grid_size = [h, w]
        self._bbox_config = [w_scale, h_scale]
        self.__check_dimensionality()
Exemplo n.º 4
0
    def __check_dimensionality(self):
        # All have dimensions [b, h, w, c]
        coords_shape = self._coords.get_shape()
        point_indicators_shape = self._point_indicators.get_shape()
        human_indicators_shape = self._human_indicators.get_shape()

        # Only convolutional networks are supported
        assert len(coords_shape) == 4 and \
            len(point_indicators_shape) == 4 and \
            len(human_indicators_shape) == 4, d_msg(
            self._context,
            'Dimensionality of all tensors must be 4, received '
            f'dim(coords)={len(coords_shape)}, '
            f'dim(point_indicators)={len(point_indicators_shape)}, '
            f'dim(human_indicators)={len(human_indicators_shape)}'
        )

        # Check spatial shape (h, w)
        assert coords_shape[1:-1] == point_indicators_shape[1:-1] and \
            coords_shape[1:-1] == human_indicators_shape[1:-1] and \
            point_indicators_shape[1:-1] == human_indicators_shape[1:-1], d_msg(
            self._context,
            'Spatial shapes are not aligned. Received '
            f'coords_shape={coords_shape}, '
            f'point_indicators_shape={point_indicators_shape}, '
            f'human_indicators_shape={human_indicators_shape}'
        )

        # Check alignment of the number of points between coords and point indicators
        n_coords = coords_shape[-1]
        assert n_coords % 2 == 0, d_msg(
            self._context,
            f'coords must have an even number of channel, received {n_coords}.'
        )

        n_points = n_coords // 2
        assert n_points == point_indicators_shape[-1], d_msg(
            self._context,
            f'Number of points in coords and point_indicators must be the same, '
            f'received {n_points} and {point_indicators_shape[-1]}.'
        )

        # Check whether human_indicators has a single channel
        assert human_indicators_shape[-1] == 1, d_msg(
            self._context,
            f'human_indicators tensor must have 1 channel, received {human_indicators_shape[-1]}.'
        )
Exemplo n.º 5
0
    def _setup_inference(self):
        # Collect tensors from every head.
        point_indicators_logits = []
        human_indicators_logits = []
        regressed_points = []
        for head in self._heads:
            point_indicators_logits += [
                head.get_point_indicators().get_data_tensor()
            ]
            human_indicators_logits += [
                head.get_human_indicators().get_data_tensor()
            ]
            regressed_points += [head.get_coords().get_data_tensor()]

        def flatten(x):
            b, h, w, c = x.get_shape().as_list()
            return tf.reshape(x, shape=[b, h * w, c])

        point_indicators_logits = list(map(flatten, point_indicators_logits))
        human_indicators_logits = list(map(flatten, human_indicators_logits))
        regressed_points = list(map(flatten, regressed_points))

        # If any of the lists is empty, it will be difficult to handle it using tf messages.
        # Hence this check is here.
        assert len(point_indicators_logits) != 0 and \
               len(human_indicators_logits) != 0 and \
               len(regressed_points) != 0, d_msg(
            self._name,
            'Length of the logits or regressed points is zero. '
            f'len(point_indicators_logits)={len(point_indicators_logits)}, '
            f'len(human_indicators_logits)={len(human_indicators_logits)}, '
            f'len(regressed_points)={len(regressed_points)}. '
            f'This is probably because the list of the heads is empty.'
        )

        # Concatenate the collected tensors
        self._point_indicators_logits = tf.concat(point_indicators_logits,
                                                  axis=1)
        self._human_indicators_logits = tf.concat(human_indicators_logits,
                                                  axis=1)
        regressed_points = tf.concat(regressed_points, axis=1)

        b, n, c = regressed_points.get_shape().as_list()
        w, h = self.get_image_size()
        regressed_points = tf.reshape(regressed_points,
                                      shape=[b, n, c // 2, 2])
        # Scale the grid: [-1, 1] -> [-w/2, w/2]
        regressed_points = regressed_points * np.array([w / 2, h / 2],
                                                       dtype='float32')
        # Shift the grid: [-w/2, w/2] -> [0, w]
        regressed_points = regressed_points + np.array([w / 2, h / 2],
                                                       dtype='float32')
        self._regressed_points = regressed_points
        # Used in predict
        self._point_indicators = tf.nn.sigmoid(self._point_indicators_logits)
        self._human_indicators = tf.nn.sigmoid(self._human_indicators_logits)
Exemplo n.º 6
0
    def __init__(self, embedding_dim: int, name: str, custom_embedding: list = None):
        """
        Creates a grid of default skeletons. These skeletons are then trained using
        gradient descent.
        The grids values are in the [-1, 1] values.

        Parameters
        ----------
        embedding_dim : int
            How many points are in the skeleton.
        name : str
            Name of the layer.
        custom_embedding : list of shape [n_points, 2]
            List containing custom skeleton embedding. It must be noted, that the embedding's values must be centered
            and normalized within [-1, 1] interval (or approximately so, you can use larger ones for the purpose
            of more dense coverage of the grid), because it will be put into a grid with values within [-1, 1] interval.
        """
        if not isinstance(embedding_dim, int):
            assert custom_embedding is not None, d_msg(
                name, 'embedding_dim is not of the type int. In this case the custom_embedding is expected to be '
                      'provided, but the custom_embedding=None.'
            )
        else:
            assert embedding_dim >= 2, d_msg(
                name, f'embedding_dim must be at least 2. Received embedding_dim={embedding_dim}'
            )

        if custom_embedding is not None:
            embedding_dim = len(custom_embedding)
            assert len(custom_embedding) >= 2, d_msg(
                name, f'Length of the custom_embedding must be at least 2. Received custom_embedding with '
                f'len={len(custom_embedding)}'
            )
            assert len(custom_embedding[0]) == 2, d_msg(
                name, f"custom_embedding's points are not 2-dimensional. "
                f"Received custom_embedding with {len(custom_embedding[0])}-dimensional points."
            )

            if not isinstance(custom_embedding, list):
                print(d_msg(
                    name, f'custom_embedding is not a list. Received custom_embedding of '
                    f'type={type(custom_embedding)}.')
                )
                print(d_msg(
                    name, 'Iterating over the custom_embedding to convert it to a list.')
                )
                custom_embedding = self.__embed2list(custom_embedding)
        self._embedding_dim = embedding_dim
        self._custom_embedding = custom_embedding
        if custom_embedding is None:
            print(d_msg(name, 'No custom embedding is provided. Creating a random one.'))
            self._custom_embedding = np.random.uniform(low=-1.0, high=1.0, size=[embedding_dim, 2]).tolist()
            # Artificially insert border points. This is required to have a consistent behaviour
            # between different runs. The configuration of the default boxes is highly dependent
            # on the result of the randomization. Therefore, we artificially restrict the
            # resulting bounding box configuration to that of the default one - (1, 1).
            self._custom_embedding[0] = [-1, -1]
            self._custom_embedding[1] = [1, 1]

        embedding = np.array(self._custom_embedding)
        with tf.name_scope(name):
            self._embedding = tf.Variable(embedding, dtype='float32', name='SkeletonEmbedding')

        super().__init__(
            name=name,
            params=[self._embedding],
            regularize_params=[],
            named_params_dict={self._embedding.name: self._embedding}
        )