Exemplo n.º 1
0
 def _forward(self, x, feat=None):
     embedding = ivy_vision.sinusoid_positional_encoding(x, self._embedding_length)
     x = ivy.relu(self._fc_layers[0](embedding))
     for i in range(1, self._num_layers-1):
         x = ivy.relu(self._fc_layers[i](x))
         if i % 4 == 0 and i > 0:
             x = ivy.concatenate([x, embedding], -1)
     x = self._fc_layers[-1](x)
     rgb = ivy.sigmoid(x[..., 0:3])
     sigma_a = ivy.nn.relu(x[..., -1])
     return rgb, sigma_a
Exemplo n.º 2
0
def test_sigmoid(x, dtype_str, tensor_fn, dev_str, call):
    # smoke test
    x = tensor_fn(x, dtype_str, dev_str)
    ret = ivy.sigmoid(x)
    # type test
    assert ivy.is_array(ret)
    # cardinality test
    assert ret.shape == x.shape
    # value test
    assert np.allclose(call(ivy.sigmoid, x),
                       ivy.numpy.sigmoid(ivy.to_numpy(x)))
    # compilation test
    helpers.assert_compilable(ivy.sigmoid)
Exemplo n.º 3
0
def lstm_update(x, init_h, init_c, kernel, recurrent_kernel, bias=None, recurrent_bias=None):
    """
    Perform long-short term memory update by unrolling time dimension of input array.

    :param x: input tensor of LSTM layer *[batch_shape, t, in]*.
    :type x: array
    :param init_h: initial state tensor for the cell output *[batch_shape, out]*.
    :type init_h: array
    :param init_c: initial state tensor for the cell hidden state *[batch_shape, out]*.
    :type init_c: array
    :param kernel: weights for cell kernel *[in, 4 x out]*.
    :type kernel: array
    :param recurrent_kernel: weights for cell recurrent kernel *[out, 4 x out]*.
    :type recurrent_kernel: array
    :param bias: bias for cell kernel *[4 x out]*.
    :type bias: array
    :param recurrent_bias: bias for cell recurrent kernel *[4 x out]*.
    :type recurrent_bias: array
    :return: hidden state for all timesteps *[batch_shape,t,out]* and cell state for last timestep *[batch_shape,out]*
    """

    # get shapes
    x_shape = list(x.shape)
    batch_shape = x_shape[:-2]
    timesteps = x_shape[-2]
    input_channels = x_shape[-1]
    x_flat = ivy.reshape(x, (-1, input_channels))

    # input kernel
    Wi = kernel
    Wi_x = ivy.reshape(ivy.matmul(x_flat, Wi) + (bias if bias is not None else 0),
                        batch_shape + [timesteps, -1])
    Wii_x, Wif_x, Wig_x, Wio_x = ivy.split(Wi_x, 4, -1)

    # recurrent kernel
    Wh = recurrent_kernel

    # lstm states
    ht = init_h
    ct = init_c

    # lstm outputs
    ot = x
    hts_list = list()

    # unrolled time dimension with lstm steps
    for Wii_xt, Wif_xt, Wig_xt, Wio_xt in zip(ivy.unstack(Wii_x, axis=-2), ivy.unstack(Wif_x, axis=-2),
                                              ivy.unstack(Wig_x, axis=-2), ivy.unstack(Wio_x, axis=-2)):
        htm1 = ht
        ctm1 = ct

        Wh_htm1 = ivy.matmul(htm1, Wh) + (recurrent_bias if recurrent_bias is not None else 0)
        Whi_htm1, Whf_htm1, Whg_htm1, Who_htm1 = ivy.split(Wh_htm1, num_sections=4, axis=-1)

        it = ivy.sigmoid(Wii_xt + Whi_htm1)
        ft = ivy.sigmoid(Wif_xt + Whf_htm1)
        gt = ivy.tanh(Wig_xt + Whg_htm1)
        ot = ivy.sigmoid(Wio_xt + Who_htm1)
        ct = ft * ctm1 + it * gt
        ht = ot * ivy.tanh(ct)

        hts_list.append(ivy.expand_dims(ht, -2))

    return ivy.concatenate(hts_list, -2), ct
Exemplo n.º 4
0
    def _forward(self, x, prev_state):
        prev_read_vector_list = prev_state[1]

        controller_input = ivy.concatenate([x] + prev_read_vector_list, axis=1)
        controller_output, controller_state = self._controller(ivy.expand_dims(controller_input, -2),
                                                               initial_state=prev_state[0])
        controller_output = controller_output[..., -1, :]

        parameters = self._controller_proj(controller_output)
        parameters = ivy.clip(parameters, -self._clip_value, self._clip_value)
        head_parameter_list = \
            ivy.split(parameters[:, :self._num_parameters_per_head * self._num_heads], self._num_heads,
                          axis=1)
        erase_add_list = ivy.split(parameters[:, self._num_parameters_per_head * self._num_heads:],
                                       2 * self._write_head_num, axis=1)

        prev_w_list = prev_state[2]
        prev_M = prev_state[4]
        w_list = []
        for i, head_parameter in enumerate(head_parameter_list):
            k = ivy.tanh(head_parameter[:, 0:self._memory_vector_dim])
            beta = ivy.softplus(head_parameter[:, self._memory_vector_dim])
            g = ivy.sigmoid(head_parameter[:, self._memory_vector_dim + 1])
            s = ivy.softmax(
                head_parameter[:, self._memory_vector_dim + 2:self._memory_vector_dim +
                                                              2 + (self._shift_range * 2 + 1)])
            gamma = ivy.softplus(head_parameter[:, -1]) + 1
            w = self._addressing(k, beta, g, s, gamma, prev_M, prev_w_list[i])
            w_list.append(w)

        # Reading (Sec 3.1)

        read_w_list = w_list[:self._read_head_num]
        if self._step == 0:
            usage_indicator = ivy.zeros_like(w_list[0])
        else:
            usage_indicator = prev_state[3] + ivy.reduce_sum(ivy.concatenate(read_w_list, 0))
        read_vector_list = []
        for i in range(self._read_head_num):
            read_vector = ivy.reduce_sum(ivy.expand_dims(read_w_list[i], axis=2) * prev_M, axis=1)
            read_vector_list.append(read_vector)

        # Writing (Sec 3.2)

        prev_wrtie_w_list = prev_w_list[self._read_head_num:]
        w_wr_size = math.ceil(self._memory_size / 2) if self._retroactive_updates else self._memory_size
        if self._sequential_writing:
            batch_size = ivy.shape(x)[0]
            if self._step < w_wr_size:
                w_wr_list = [ivy.tile(ivy.cast(ivy.one_hot(
                    ivy.array([self._step]), w_wr_size), 'float32'),
                    (batch_size, 1))] * self._write_head_num
            else:
                batch_idxs = ivy.expand_dims(ivy.arange(batch_size, 0), -1)
                mem_idxs = ivy.expand_dims(ivy.argmax(usage_indicator[..., :w_wr_size], -1), -1)
                total_idxs = ivy.concatenate((batch_idxs, mem_idxs), -1)
                w_wr_list = [ivy.scatter_nd(total_idxs, ivy.ones((batch_size,)),
                                                (batch_size, w_wr_size))] * self._write_head_num
        else:
            w_wr_list = w_list[self._read_head_num:]
        if self._retroactive_updates:
            w_ret_list = [self._retroactive_discount * prev_wrtie_w[..., w_wr_size:] +
                          (1 - self._retroactive_discount) * prev_wrtie_w[..., :w_wr_size]
                          for prev_wrtie_w in prev_wrtie_w_list]
            w_wrtie_list = [ivy.concatenate((w_wr, w_ret), -1) for w_wr, w_ret in zip(w_wr_list, w_ret_list)]
        else:
            w_wrtie_list = w_wr_list
        M = prev_M
        for i in range(self._write_head_num):
            w = ivy.expand_dims(w_wrtie_list[i], axis=2)
            if self._with_erase:
                erase_vector = ivy.expand_dims(ivy.sigmoid(erase_add_list[i * 2]), axis=1)
                M = M * ivy.ones(ivy.shape(M)) - ivy.matmul(w, erase_vector)
            add_vector = ivy.expand_dims(ivy.tanh(erase_add_list[i * 2 + 1]), axis=1)
            M = M + ivy.matmul(w, add_vector)

        NTM_output = self._output_proj(ivy.concatenate([controller_output] + read_vector_list, axis=1))
        NTM_output = ivy.clip(NTM_output, -self._clip_value, self._clip_value)

        self._step += 1
        return NTM_output, NTMControllerState(
            controller_state=controller_state, read_vector_list=read_vector_list, w_list=w_list,
            usage_indicator=usage_indicator, M=M)
Exemplo n.º 5
0
def loss_fn(ntm, v, total_seq, target_seq, seq_len):
    output_sequence = ntm(total_seq, v=v)
    pred_logits = output_sequence[:, seq_len + 1:, :]
    pred_vals = ivy.sigmoid(pred_logits)
    return ivy.reduce_sum(ivy.binary_cross_entropy(
        pred_vals, target_seq))[0] / pred_vals.shape[0], pred_vals