Exemplo n.º 1
0
    def take_one_step(self, nn_input_bf, hid_out=None):
        # Sometimes you don't want to unroll all t-steps of a recurrence but rather just one forward step.
        num_batch = nn_input_bf.shape[0]

        def slice_w(x, n):
            return x[:, n*self.num_units:(n+1)*self.num_units]

        # Create single recurrent computation step function
        # input_n is the n'th vector of the input
        def step(input_n, cell_previous, hid_previous, W_hid_stacked, W_in_stacked, b_stacked):

            input_n = cgt.broadcast("+", cgt.dot(input_n, W_in_stacked), b_stacked, "xx,1x")

            # Calculate gates pre-activations and slice
            gates = input_n + cgt.dot(hid_previous, W_hid_stacked)

            # Extract the pre-activation gate values
            ingate = slice_w(gates, 0)
            forgetgate = slice_w(gates, 1)
            cell_input = slice_w(gates, 2)
            outgate = slice_w(gates, 3)

            # Apply nonlinearities
            ingate = self.nonlinearity_ingate(ingate)
            forgetgate = self.nonlinearity_forgetgate(forgetgate)
            cell_input = self.nonlinearity_cell(cell_input)
            outgate = self.nonlinearity_outgate(outgate)

            # Compute new cell value
            cell = forgetgate*cell_previous + ingate*cell_input
            # Compute new hidden unit activation
            hid = outgate*self.nonlinearity(cell)
            return [cell, hid]

        if hid_out is None:
            if self.hid_prev is None:
                self.hid_prev = cgt.dot(cgt.ones((self.num_batches, 1)), self.hid_init)
            hid_out = self.hid_prev

        if self.cell_prev is None:
            ones = cgt.ones((num_batch, 1))
            self.cell_prev = cgt.dot(ones, self.cell_init)
            self.hid_prev = cgt.dot(ones, self.hid_init)

        if hid_out is None:
            ones = cgt.ones((num_batch, 1))
            self.hid_prev = cgt.dot(ones, self.hid_init)
            hid_out = self.hid_prev

        one_step_out = step(nn_input_bf, hid_out, self.cell_prev,
                            self.W_hid_stacked, self.W_in_stacked, self.b_stacked)
        self.cell_prev = one_step_out[0]
        self.hid_prev = one_step_out[1]

        return self.hid_prev
Exemplo n.º 2
0
Arquivo: api.py Projeto: Quantza/cgt
def to_one_hot(y, nb_class, dtype=None):
    """
    Return a matrix where each row corresponds to the one hot
    encoding of each element in y.
    Parameters
    ----------
    y
        A vector of integer value between 0 and nb_class - 1.
    nb_class : int
        The number of classes in y.
    dtype : data-type
        The dtype of the returned matrix. Default floatX.
    Returns
    -------
    object
        A matrix of shape (y.shape[0], nb_class), where each row ``i`` is
        the one hot encoding of the corresponding ``y[i]`` value.
    """
    
    fill_vals = cgt.ones((y.shape[0],))
    ret = cgt.zeros((y.shape[0], nb_class), dtype)
    
    d1 = cgt.arange(y.shape[0])
    d2 = cgt.cast(y, 'i1')
    
    ret = cgt.inc_subtensor(ret, [d1, d2], fill_vals)
    
    return ret
Exemplo n.º 3
0
Arquivo: api.py Projeto: xyuan/cgt
def to_one_hot(y, nb_class, dtype=None):
    """
    Return a matrix where each row corresponds to the one hot
    encoding of each element in y.
    Parameters
    ----------
    y
        A vector of integer value between 0 and nb_class - 1.
    nb_class : int
        The number of classes in y.
    dtype : data-type
        The dtype of the returned matrix. Default floatX.
    Returns
    -------
    object
        A matrix of shape (y.shape[0], nb_class), where each row ``i`` is
        the one hot encoding of the corresponding ``y[i]`` value.
    """

    fill_vals = cgt.ones((y.shape[0], ))
    ret = cgt.zeros((y.shape[0], nb_class), dtype)

    d1 = cgt.arange(y.shape[0])
    d2 = cgt.cast(y, 'i1')

    ret = cgt.inc_subtensor(ret, [d1, d2], fill_vals)

    return ret
Exemplo n.º 4
0
    def take_one_step(self, input_bf, hid_out=None):
        num_batch = input_bf.shape[0]

        def step(input_bh, hid_previous_bh):
            hid_pre_bh = self.hid_to_hid(hid_previous_bh)
            hid_pre_bh += self.in_to_hid(input_bh)
            return self.activation(hid_pre_bh)

        if self.prev_out is None:
            self.prev_out = cgt.dot(cgt.ones((num_batch, 1)), self.hid_init)

        if hid_out is None:
            ones = cgt.ones((num_batch, 1))
            self.prev_out = cgt.dot(ones, self.hid_init)
            hid_out = self.prev_out

        self.prev_out = step(input_bf, hid_out)
        return self.prev_out
Exemplo n.º 5
0
    def __call__(self, x):
        input_btf = x
        input_tbf = cgt.dimshuffle(input_btf, [1, 0, 2])
        seq_len, num_batch = input_tbf.shape[0], input_tbf.shape[1]

        def step(input_bh, hid_previous_bh):
            hid_pre_bh = self.hid_to_hid(hid_previous_bh)
            hid_pre_bh += self.in_to_hid(input_bh)
            return self.activation(hid_pre_bh)

        hid_init_bh = cgt.dot(cgt.ones((num_batch, 1)), self.hid_init)

        hid_out_tbf = unroll_recurrence(
            step_function=step,
            input_to_unroll_tbf=input_tbf,
            hid_init=[hid_init_bh],
            go_backwards=self.backwards,
            n_steps=self.timesteps)

        hid_out_btf = cgt.dimshuffle(hid_out_tbf, [1, 0, 2])
        if self.backwards:
            hid_out_btf = cgt.flip(hid_out_btf, [1])
        return hid_out_btf
Exemplo n.º 6
0
    def take_one_step(self, nn_input_bf, hid_out):

        #PROBABLY BUGGED. SHOULD BE REWRITTEN.

        self.num_batches = cgt.infer_shape(nn_input_bf)[0]

        # (n_time_steps, n_batch, n_features)
        #input_bf = cgt.dimshuffle(nn_input_bf, [1, 0, 2])

        # Stack input weight matrices into a (num_inputs, 3*num_units)
        # matrix, which speeds up computation
        W_in_stacked = cgt.concatenate(
            [self.W_in_to_resetgate, self.W_in_to_updategate,
             self.W_in_to_hidden_update], axis=1)

        # Same for hidden weight matrices
        W_hid_stacked = cgt.concatenate(
            [self.W_hid_to_resetgate, self.W_hid_to_updategate,
             self.W_hid_to_hidden_update], axis=1)

        # Stack gate biases into a (3*num_units) vector
        b_stacked = cgt.concatenate(
            [self.b_resetgate, self.b_updategate,
             self.b_hidden_update], axis=1)


        # At each loop, input_n will be (n_time_steps, 3*num_units).
        # We define a slicing function that extract the input to each GRU gate
        def slice_w(x, n):
            return x[:, n*self.num_units:(n+1)*self.num_units]

        # Create single recurrent computation step function
        # input__n is the n'th vector of the input
        def step(input_n, hid_previous, W_hid_stacked, W_in_stacked, b_stacked):
            # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1}, and W_{hc} h_{t - 1}
            hid_input = cgt.dot(hid_previous, W_hid_stacked)

            # Compute W_{xr}x_t + b_r, W_{xu}x_t + b_u, and W_{xc}x_t + b_c
            input_n = cgt.broadcast("+", input_n.dot(W_in_stacked), b_stacked, "xx,1x")

            # Reset and update gates
            resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0)
            updategate = slice_w(hid_input, 1) + slice_w(input_n, 1)
            resetgate = self.nonlinearity_resetgate(resetgate)
            updategate = self.nonlinearity_updategate(updategate)

            # Compute W_{xc}x_t + r_t \odot (W_{hc} h_{t - 1})
            hidden_update_in = slice_w(input_n, 2)
            hidden_update_hid = slice_w(hid_input, 2)
            hidden_update = hidden_update_in + resetgate*hidden_update_hid

            # Compute (1 - u_t)h_{t - 1} + u_t c_t
            hid = (1 - updategate)*hid_previous + updategate*hidden_update
            return self.nonlinearity_hid(hid)  # adding this non-linearity seems to help stability.
            #return hid

        if hid_out is None:
            if self.hid_out is None:
                self.hid_out = cgt.dot(cgt.ones((self.num_batches, 1)), self.hid_init)
            hid_out = self.hid_out



        # Retrieve the dimensionality of the incoming layer
        hid_out = step(nn_input_bf, hid_out, W_hid_stacked, W_in_stacked, b_stacked)

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        # self.hid_out = cgt.dimshuffle(self.hid_out, [1, 0, 2])

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = cgt.flip(hid_out, [1])

        self.hid_out = hid_out

        return hid_out
Exemplo n.º 7
0
    def __call__(self, input_btf):

        # (n_time_steps, n_batch, n_features)
        input_tbf = cgt.dimshuffle(input_btf, [1, 0, 2])
        self.num_batches = cgt.infer_shape(input_tbf)[1]

        # Stack input weight matrices into a (num_inputs, 3*num_units)
        # matrix, which speeds up computation
        W_in_stacked = cgt.concatenate(
            [self.W_in_to_resetgate, self.W_in_to_updategate,
             self.W_in_to_hidden_update], axis=1)

        # Same for hidden weight matrices
        W_hid_stacked = cgt.concatenate(
            [self.W_hid_to_resetgate, self.W_hid_to_updategate,
             self.W_hid_to_hidden_update], axis=1)

        # Stack gate biases into a (3*num_units) vector
        b_stacked = cgt.concatenate(
            [self.b_resetgate, self.b_updategate,
             self.b_hidden_update], axis=1)


        # At each loop, input_n will be (n_time_steps, 3*num_units).
        # We define a slicing function that extract the input to each GRU gate
        def slice_w(x, n):
            return x[:, n*self.num_units:(n+1)*self.num_units]

        # Create single recurrent computation step function
        # input__n is the n'th vector of the input
        def step(input_n, hid_previous, W_hid_stacked, W_in_stacked, b_stacked):
            # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1}, and W_{hc} h_{t - 1}
            hid_input = cgt.dot(hid_previous, W_hid_stacked)

            # Compute W_{xr}x_t + b_r, W_{xu}x_t + b_u, and W_{xc}x_t + b_c
            input_n = cgt.broadcast("+", input_n.dot(W_in_stacked), b_stacked, "xx,1x")

            # Reset and update gates
            resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0)
            updategate = slice_w(hid_input, 1) + slice_w(input_n, 1)
            resetgate = self.nonlinearity_resetgate(resetgate)
            updategate = self.nonlinearity_updategate(updategate)

            # Compute W_{xc}x_t + r_t \odot (W_{hc} h_{t - 1})
            hidden_update_in = slice_w(input_n, 2)
            hidden_update_hid = slice_w(hid_input, 2)
            hidden_update = hidden_update_in + resetgate*hidden_update_hid

            # Compute (1 - u_t)h_{t - 1} + u_t c_t
            hid = (1 - updategate)*hid_previous + updategate*hidden_update
            return hid

        sequences = [input_tbf]
        step_fun = step
        hid_init = cgt.dot(cgt.ones((self.num_batches, 1)), self.hid_init)

        # The hidden-to-hidden weight matrix is always used in step
        non_seqs = [W_hid_stacked]
        # When we aren't precomputing the input outside of scan, we need to
        # provide the input weights and biases to the step function
        non_seqs += [W_in_stacked, b_stacked]
        # theano.scan only allows for positional arguments, so when
        # self.precompute_input is True, we need to supply fake placeholder
        # arguments for the input weights and biases.

        # Retrieve the dimensionality of the incoming layer
        hid_out = unroll_lstm(
            fn=step_fun,
            sequences=sequences,
            outputs_info=[hid_init],
            go_backwards=self.backwards,
            non_sequences=non_seqs,
            n_steps=self.timesteps)[0]

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = cgt.dimshuffle(hid_out, [1, 0, 2])

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = cgt.flip(hid_out, [1])

        return hid_out
Exemplo n.º 8
0
    def __call__(self, nn_input_btf):

        # Because scan iterates over the first dimension we dimshuffle to
        # (n_time_steps, n_batch, n_features)
        nn_input_tbf = cgt.dimshuffle(nn_input_btf, [1, 0, 2])
        seq_len, num_batch = nn_input_tbf.shape[0], nn_input_tbf.shape[1]

        def slice_w(x, n):
            return x[:, n*self.num_units:(n+1)*self.num_units]

        # Create single recurrent computation step function
        # input_n is the n'th vector of the input
        def step(input_n, cell_previous, hid_previous, W_hid_stacked, W_in_stacked, b_stacked):

            input_n = cgt.broadcast("+", cgt.dot(input_n, W_in_stacked), b_stacked, "xx,1x")

            # Calculate gates pre-activations and slice
            gates = input_n + cgt.dot(hid_previous, W_hid_stacked)

            # Extract the pre-activation gate values
            ingate = slice_w(gates, 0)
            forgetgate = slice_w(gates, 1)
            cell_input = slice_w(gates, 2)
            outgate = slice_w(gates, 3)

            # Apply nonlinearities
            ingate = self.nonlinearity_ingate(ingate)
            forgetgate = self.nonlinearity_forgetgate(forgetgate)
            cell_input = self.nonlinearity_cell(cell_input)
            outgate = self.nonlinearity_outgate(outgate)

            # Compute new cell value
            cell = forgetgate*cell_previous + ingate*cell_input
            # Compute new hidden unit activation
            hid = outgate*self.nonlinearity(cell)
            return [cell, hid]

        sequences = nn_input_tbf
        step_fun = step

        ones = cgt.ones((num_batch, 1))
        cell_init = cgt.dot(ones, self.cell_init)
        hid_init = cgt.dot(ones, self.hid_init)

        # The hidden-to-hidden weight matrix is always used in step
        non_seqs = [self.W_hid_stacked]
        non_seqs += [self.W_in_stacked, self.b_stacked]
        cell_out, hid_out = unroll_lstm(
            fn=step_fun,
            sequences=sequences,
            outputs_info=[cell_init, hid_init],
            go_backwards=self.backwards,
            non_sequences=non_seqs,
            n_steps=self.timesteps)

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = cgt.dimshuffle(hid_out, [1, 0, 2])

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = cgt.flip(hid_out, [1])

        return hid_out
Exemplo n.º 9
0
def ones(shape):
    return cgt.ones(shape)
Exemplo n.º 10
0
def ones(shape):
    return cgt.ones(shape)