def test_to_tensor():
    # 3D Input
    data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
    out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8'))
    assert_almost_equal(
        out_nd.asnumpy(),
        np.transpose(data_in.astype(dtype=np.float32) / 255.0, (2, 0, 1)))

    # 4D Input
    data_in = np.random.uniform(0, 255,
                                (5, 300, 300, 3)).astype(dtype=np.uint8)
    out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8'))
    assert_almost_equal(
        out_nd.asnumpy(),
        np.transpose(data_in.astype(dtype=np.float32) / 255.0, (0, 3, 1, 2)))

    # Invalid Input
    invalid_data_in = np.random.uniform(
        0, 255, (5, 5, 300, 300, 3)).astype(dtype=np.uint8)
    transformer = transforms.ToTensor()
    assertRaises(MXNetError, transformer, invalid_data_in)

    # Bounds (0->0, 255->1)
    data_in = np.zeros((10, 20, 3)).astype(dtype=np.uint8)
    out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8'))
    assert same(
        out_nd.asnumpy(),
        np.transpose(np.zeros(data_in.shape, dtype=np.float32), (2, 0, 1)))

    data_in = np.full((10, 20, 3), 255).astype(dtype=np.uint8)
    out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8'))
    assert same(
        out_nd.asnumpy(),
        np.transpose(np.ones(data_in.shape, dtype=np.float32), (2, 0, 1)))
Пример #2
0
    def init_state_from_encoder(
            self,
            encoder_outputs: np.ndarray,
            encoder_valid_length: Optional[np.ndarray] = None,
            target_embed: Optional[np.ndarray] = None) -> List[np.ndarray]:
        """
        Returns the initial states given encoder output. States for teacher-forced training are encoder outputs
        and a valid length mask for encoder outputs.
        At inference, this method returns the following state tuple:
        valid length bias, step state,
        [projected encoder attention keys, projected encoder attention values] * num_layers,
        [autoregressive state dummies] * num_layers.

        :param encoder_outputs: Encoder outputs. Shape: (batch, source_length, encoder_dim).
        :param encoder_valid_length: Valid lengths of encoder outputs. Shape: (batch,).
        :param target_embed: Target-side embedding layer output. Shape: (batch, target_length, target_embedding_dim).
        :return: Initial states.
        """
        if target_embed is None:  # Inference: initial step = 0. Shape: (batch_size, 1)
            steps = np.expand_dims(np.zeros_like(encoder_valid_length), axis=1)
        else:  # Training: steps up to target length. Shape: (1, target_length)
            steps = np.expand_dims(npx.arange_like(target_embed, axis=1),
                                   axis=0)

        if self.inference_only:
            # Encoder projection caching, therefore we don't pass the encoder_outputs
            states = [steps, encoder_valid_length]

            for layer in self.layers:
                enc_att_kv = layer.enc_attention.ff_kv(encoder_outputs)
                states.append(np.transpose(enc_att_kv, axes=(1, 0, 2)))
        else:
            # NO encoder projection caching
            states = [
                steps,
                np.transpose(encoder_outputs, axes=(1, 0, 2)),
                encoder_valid_length
            ]

        _batch_size = encoder_outputs.shape[0]
        _ctx = encoder_outputs.ctx
        _dtype = encoder_outputs.dtype
        dummy_autoregr_states = [
            np.zeros(layer.get_states_shape(_batch_size),
                     ctx=_ctx,
                     dtype=_dtype) for layer in self.layers
            for _ in range(layer.num_state_tensors)
        ]

        states += dummy_autoregr_states
        return states
Пример #3
0
def test_np_transpose():
    # TODO(junwu): Add more test cases
    data = mx.sym.var('a').as_np_ndarray()
    ret = data.transpose()
    assert type(ret) == mx.sym.np._Symbol

    dtypes = ['float32', 'int32']
    for dtype in dtypes:
        for ndim in [0, 1, 2, 3, 4, 5, 6]:
            shape = rand_shape_nd(ndim, dim=5, allow_zero_size=True)
            np_data = _np.random.uniform(low=-100, high=100,
                                         size=shape).astype(dtype)
            mx_data = np.array(np_data, dtype=dtype)
            axes = [None]
            if ndim == 0:
                axes += [()]
            else:
                axis = [i for i in range(ndim)]
                axes.append(tuple(axis))
                random.shuffle(axis)
                axes.append(tuple(axis))
            for axis in axes:
                np_out = _np.transpose(np_data, axes=axis)
                mx_out = np.transpose(mx_data, axes=axis)
                assert np_out.dtype == mx_out.dtype
                assert same(mx_out.asnumpy(), np_out)
Пример #4
0
def test_np_transpose():
    def np_transpose_grad(out_shape, dtype, axes=None):
        ograd = _np.ones(out_shape, dtype=dtype)
        if axes is None or axes == ():
            return _np.transpose(ograd, axes)
        np_axes = _np.array(list(axes))
        return _np.transpose(ograd, tuple(list(_np.argsort(np_axes))))

    class TestTranspose(HybridBlock):
        def __init__(self, axes=None):
            super(TestTranspose, self).__init__()
            self.axes = axes

        def hybrid_forward(self, F, a):
            return F.np.transpose(a, self.axes)

    for hybridize in [True, False]:
        for dtype in [_np.int32, _np.float32]:
            for ndim in range(7):
                shape = rand_shape_nd(ndim, dim=5, allow_zero_size=True)
                axeses = [None]
                if ndim == 0:
                    axeses += [()]
                else:
                    axes = [i for i in range(ndim)]
                    axeses.append(tuple(axes))
                    random.shuffle(axes)
                    axeses.append(tuple(axes))
                for axes in axeses:
                    test_trans = TestTranspose(axes)
                    if hybridize:
                        test_trans.hybridize()
                    x = rand_ndarray(shape).as_np_ndarray()
                    x = x.astype(dtype)
                    x.attach_grad()
                    np_out = _np.transpose(x.asnumpy(), axes)
                    with mx.autograd.record():
                        mx_out = test_trans(x)
                    assert mx_out.shape == np_out.shape
                    assert_almost_equal(mx_out.asnumpy(),
                                        np_out,
                                        rtol=1e-3,
                                        atol=1e-5,
                                        use_broadcast=False)
                    mx_out.backward()
                    np_backward = np_transpose_grad(np_out.shape, dtype, axes)
                    assert_almost_equal(x.grad.asnumpy(),
                                        np_backward,
                                        rtol=1e-3,
                                        atol=1e-5,
                                        use_broadcast=False)

                    mx_out = np.transpose(x, axes)
                    np_out = _np.transpose(x.asnumpy(), axes)
                    assert_almost_equal(mx_out.asnumpy(),
                                        np_out,
                                        rtol=1e-3,
                                        atol=1e-5,
                                        use_broadcast=False)
Пример #5
0
    def forward(self, data, valid_length):
        # positional embedding
        data = self.pos_embedding(data, None)

        if self.config.dropout_prepost > 0.0:
            data = npx.dropout(data=data, p=self.config.dropout_prepost)

        # (batch_size * heads, seq_len)
        att_valid_length = layers.prepare_source_valid_lengths(valid_length, data,
                                                               num_heads=self.config.attention_heads)

        data = np.transpose(data, axes=(1, 0, 2))
        for block in self.layers:
            data = block(data, att_valid_length)

        data = self.final_process(data, None)
        data = np.transpose(data, axes=(1, 0, 2))
        return data, valid_length
Пример #6
0
    def forward(self, rel_positions, query=None):
        """Forward function

        Parameters
        ----------
        rel_positions
            The relative shifts. Shape (query_length, mem_length).
            Each element represents the shift between the :math:`i-th` element of query and
            the :math:`j-th` element of memory.
        query
            The query for computing the relative scores. The shape depends on the layout.
            If we use T5 attention, the query will not be used.

        Returns
        -------
        rel_scores
            The relative attention scores
            Can have shape (batch_size, num_heads, query_length, mem_length)
            or (num_heads, query_length, mem_length)
        """
        if self._method == 'transformer_xl' or self._method == 'shaw':
            assert query is not None, 'Must specify query if method={}'.format(self._method)
            if self._bidirectional:
                if self._max_distance is not None:
                    rel_positions = np.clip(rel_positions,
                                              a_min=-self._max_distance, a_max=self._max_distance)
            else:
                if self._max_distance is not None:
                    rel_positions = np.clip(rel_positions,
                                              a_min=0, a_max=self._max_distance)
            # uniq_rel.shape = (#uniq,), rev_index.shape = (L_q, L_m)
            uniq_rel, rev_index = np.unique(rel_positions, return_inverse=True)

            uniq_rel_pos_embed = self._rel_pos_embed(uniq_rel)
            if self._method == 'transformer_xl':
                uniq_rel_pos_embed = self._rel_proj(self._dropout_layer(uniq_rel_pos_embed))
            # Shape (#uniq, K, C_q)
            uniq_rel_pos_embed = npx.reshape(uniq_rel_pos_embed,
                                               (-2, self._num_heads, self._head_query_units))
            # Calculate the dot-product between query and the relative positional embeddings.
            # After the calculation, rel_score.shape = (L_q, #uniq, N, K)
            if self._layout == 'NKT':
                # query_for_rel: (N, K, L_q, C_q)
                if self._use_einsum:
                    rel_score = np.einsum('bnid,jnd->ijbn', query, uniq_rel_pos_embed)
                else:
                    rel_score = np.transpose(
                        np.matmul(query,
                                    np.transpose(uniq_rel_pos_embed, (1, 2, 0))),
                        (2, 3, 0, 1)
                    )
            elif self._layout == 'NTK':
                # query_for_rel: (N, L_q, K, C_q)
                if self._use_einsum:
                    rel_score = np.einsum('bind,jnd->ijbn', query, uniq_rel_pos_embed)
                else:
                    rel_score = np.transpose(
                        np.matmul(np.swapaxes(query, 1, 2),
                                    np.transpose(uniq_rel_pos_embed, (1, 2, 0))),
                        (2, 3, 0, 1)
                    )
            elif self._layout == 'TNK':
                # query_for_rel: (L_q, N, K, C_q)
                if self._use_einsum:
                    rel_score = np.einsum('ibnd,jnd->ijbn', query, uniq_rel_pos_embed)
                else:
                    rel_score = np.transpose(
                        np.matmul(np.transpose(query, (1, 2, 0, 3)),
                                    np.transpose(uniq_rel_pos_embed, (1, 2, 0))),
                        (2, 3, 0, 1)
                    )
            else:
                raise NotImplementedError
            # We use gather_nd to select the elements
            # TODO(sxjscience) Use advanced indexing once available
            rev_index = npx.reshape_like(rev_index, rel_positions).astype(np.int32)
            query_idx = np.expand_dims(npx.arange_like(rel_positions, axis=0).astype(np.int32),
                                         axis=-1) + np.zeros_like(rev_index)
            rel_score = npx.gather_nd(rel_score, np.stack([query_idx, rev_index]))
            rel_score = np.transpose(rel_score, (2, 3, 0, 1))
        elif self._method == 't5':
            # shape is (K, L_q, L_m)
            rel_score = self._rel_pos_embed(rel_positions).transpose((2, 0, 1))
        else:
            raise NotImplementedError
        return rel_score
Пример #7
0
    def forward(
            self, step_input: np.ndarray,
            states: List[np.ndarray]) -> Tuple[np.ndarray, List[np.ndarray]]:
        mask = None
        if self.inference_only:
            steps, source_valid_length, *other = states
            source_encoded = None  # use constant pre-computed key value projections from the states
            enc_att_kv = other[:self.config.num_layers]
            autoregr_states = other[self.config.num_layers:]
        else:
            if any(layer.needs_mask for layer in self.layers):
                mask = self.autoregressive_bias(
                    step_input)  # mask: (1, length, length)
            steps, source_encoded, source_valid_length, *autoregr_states = states
            enc_att_kv = [None for _ in range(self.config.num_layers)]

        if any(layer.num_state_tensors > 1 for layer in self.layers):
            # separates autoregressive states by layer
            states_iter = iter(autoregr_states)
            autoregr_states = [
                list(islice(states_iter, 0, layer.num_state_tensors))
                for layer in self.layers
            ]

        # (batch_size * heads, query_length)
        source_valid_length = layers.prepare_source_valid_lengths(
            source_valid_length,
            step_input,
            num_heads=self.config.attention_heads)

        # target: (batch_size, length, model_size)
        target = self.pos_embedding(step_input, steps)
        # (length, batch_size, model_size)
        target = np.transpose(target, axes=(1, 0, 2))

        if self.config.dropout_prepost > 0.0:
            target = npx.dropout(data=target, p=self.config.dropout_prepost)

        new_autoregr_states = []
        for layer, layer_autoregr_state, layer_enc_att_kv in zip(
                self.layers, autoregr_states, enc_att_kv):
            target, new_layer_autoregr_state = layer(target, mask,
                                                     source_encoded,
                                                     source_valid_length,
                                                     layer_autoregr_state,
                                                     layer_enc_att_kv)

            new_autoregr_states += [*new_layer_autoregr_state]

        target = self.final_process(target, None)
        target = np.transpose(target, axes=(1, 0, 2))

        # Inference: increment steps by 1 (discarded in training)
        steps = steps + 1

        if self.inference_only:
            # pass in cached encoder states
            encoder_attention_keys_values = states[2:2 +
                                                   self.config.num_layers]
            new_states = [
                steps, states[1]
            ] + encoder_attention_keys_values + new_autoregr_states
        else:
            encoder_outputs = states[1]
            encoder_valid_length = states[2]
            new_states = [steps, encoder_outputs, encoder_valid_length
                          ] + new_autoregr_states

        return target, new_states