Пример #1
    def get_context(self, prev_state_bf):

        state_step_bf = self.states_mlp_bf(prev_state_bf)
        state_step_b1f = cgt.dimshuffle(state_step_bf, [0, 'x', 1])

        # Compute the inner product <phi(s_i), psi(h_u)> where phi and psi are MLPs.
        # The below line computes the pointwise product of phi(s_i) and psi(h_u) and then sums to get the inner product.
        # scalar_energies_vec_bt = cgt.sqrt(cgt.sum(cgt.broadcast('*', state_step_b1f, self.features_post_mlp_btf, 'x1x,xxx'), axis=2))

        # Compute tau=tanh(h_u*W + s_i*V), broadcasting to do all h_u mults at once.
        scalar_energies_vec_btf = cgt.tanh(cgt.broadcast('+', self.features_post_mlp_btf, state_step_b1f, 'xxx,x1x'))

        # The next two lines compute w^T*(tau) with a pointwise product and then a sum.
        scalar_energies_vec_btf = cgt.broadcast('*', self.mixing_vec_w, scalar_energies_vec_btf, '11x,xxx')
        scalar_energies_vec_bt = cgt.sum(scalar_energies_vec_btf, axis=2)

        # Softmax weights the blended features over their time dimesions.
        softmax_weights_bt = nn.softmax(scalar_energies_vec_bt, axis=1)

        # This weight multiplies all features.
        extended_softmax_bt1 = cgt.dimshuffle(softmax_weights_bt, [0, 1, 'x'])
        # Weight the features by it's temporally dependent softmax weight.
        pre_blended = cgt.broadcast('*', extended_softmax_bt1, self.features_post_mlp_btf, 'xx1,xxx')
        # Integrate out time.
        blended_features_bf = cgt.sum(pre_blended, axis=1)

        return blended_features_bf
Пример #2
    def get_context_backup(self, prev_state_bf):
        state_step_bf = cgt.sigmoid(self.states_mlp_bf(prev_state_bf))

        product_list = []
        for time_step in range(0, 3):
            inner_product = cgt.sum(state_step_bf*self.features_post_mlp_btf[:, time_step, :], axis=1)
        st = cgt.stack(product_list)
        st = cgt.dimshuffle(st, [1, 0])
        softmax_weights = softmax(st)

        sum = None

        for time_step in range(0, 3):
            softmax_t_step = cgt.dimshuffle(softmax_weights[:, time_step], [0, 'x'])
            if sum is None:
                sum = cgt.broadcast('*', softmax_t_step, self.features_post_mlp_btf[:, time_step, :], 'x1,xx')
                sum += cgt.broadcast('*', softmax_t_step, self.features_post_mlp_btf[:, time_step, :], 'x1,xx')

        return sum
Пример #3
    def make_prediction(self, max_label_length, ground_labels_basis_btc):
        context_i_bf = parameter(init_array(IIDGaussian(0.1), (self.batch_size, self.feature_size)), name=None)
        state_i_bf = parameter(init_array(IIDGaussian(0.1), (self.batch_size, self.decoder_size)), name=None)
        char_list = []
        for iter_step in range(0, max_label_length): #Is this right?
            prev_out_bc = ground_labels_basis_btc[:, iter_step, :]
            state_i_bf = self.get_decoder_state(context_i_bf, prev_out_bc, state_i_bf)
            context_i_bf = self.get_context(state_i_bf)
            this_character_dist = self.get_character_distribution(state_i_bf, context_i_bf)
            char_list.append(cgt.argmax(this_character_dist, axis=1))

        final = cgt.dimshuffle(cgt.stack(char_list), [1, 0])
        return final
Пример #4
def pyramidLayer(nn_input, temporal_resolution_decrease=2):
    Batch by time by features. Decreases temporal resolution and increases feature dimension by a resolution decrease factor.
    t_steps = cgt.infer_shape(nn_input)[1]
    if t_steps % temporal_resolution_decrease != 0:
        raise ValueError('number of timesteps is not divisable by resolution decrease!')
    out_list = []
    for iter_step in range(0, t_steps, temporal_resolution_decrease):
        concentrate_list = []
        for sub_iter_step in range(0, temporal_resolution_decrease):
            concentrate_list.append(nn_input[:, iter_step + sub_iter_step, :])
        out_list.append(cgt.concatenate(concentrate_list, axis=1))
    return cgt.dimshuffle(cgt.stack(out_list), [1, 0, 2])
Пример #5
    def __call__(self, x):
        input_btf = x
        input_tbf = cgt.dimshuffle(input_btf, [1, 0, 2])
        seq_len, num_batch = input_tbf.shape[0], input_tbf.shape[1]

        def step(input_bh, hid_previous_bh):
            hid_pre_bh = self.hid_to_hid(hid_previous_bh)
            hid_pre_bh += self.in_to_hid(input_bh)
            return self.activation(hid_pre_bh)

        hid_init_bh = cgt.dot(cgt.ones((num_batch, 1)), self.hid_init)

        hid_out_tbf = unroll_recurrence(

        hid_out_btf = cgt.dimshuffle(hid_out_tbf, [1, 0, 2])
        if self.backwards:
            hid_out_btf = cgt.flip(hid_out_btf, [1])
        return hid_out_btf
Пример #6
def temporalDenseLayer(nn_input, num_units, activation=rectify, w_init=XavierNormal(), bias_init=Constant(0)):
    Batch by time by features.
    if len(nn_input.shape) > 3:
        nn_input = nn_input.reshape([nn_input.shape[0], nn_input.shape[1], nn_input.shape[2:]])
    dims = cgt.infer_shape(nn_input)
    temporal_dims = dims[1]
    feature_dims = dims[2]
    affine_underbelly = Affine(feature_dims, num_units, weight_init=w_init, bias_init=bias_init)
    out_list = []
    for iter_step in range(0, temporal_dims):
        input_slice = nn_input[:, iter_step, :]
    return cgt.dimshuffle(cgt.stack(out_list), [1, 0, 2])
Пример #7
    def __init__(self, input, n_in, n_out, W=None, b=None,
                 activation=cgt.tanh, prefix=""):
        self.n_in = n_in
        self.n_out = n_out

        if W is None:
            # XXX replace with nn init
            W_values = np.asarray(
                    low=-np.sqrt(6. / (n_in + n_out)),
                    high=np.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)
            if activation == cgt.sigmoid:
                W_values *= 4

            W = cgt.shared(W_values, name=prefix+"_W")

        if b is None:
            b_values = np.zeros((n_out,), dtype=cgt.floatX)
            b = cgt.shared(b_values, name=prefix+"_b")

        self.W = W
        self.b = b

        # XXX broadcast api may change
        lin_output = cgt.broadcast("+", cgt.dot(input, self.W),
                cgt.dimshuffle(self.b, ["x", 0]), "xx,1x")
        self.output = (
            lin_output if activation is None
            else activation(lin_output)
        # parameters of the model
        self.params = [self.W, self.b]
Пример #8
    def __call__(self, input_btf):

        # (n_time_steps, n_batch, n_features)
        input_tbf = cgt.dimshuffle(input_btf, [1, 0, 2])
        self.num_batches = cgt.infer_shape(input_tbf)[1]

        # Stack input weight matrices into a (num_inputs, 3*num_units)
        # matrix, which speeds up computation
        W_in_stacked = cgt.concatenate(
            [self.W_in_to_resetgate, self.W_in_to_updategate,
             self.W_in_to_hidden_update], axis=1)

        # Same for hidden weight matrices
        W_hid_stacked = cgt.concatenate(
            [self.W_hid_to_resetgate, self.W_hid_to_updategate,
             self.W_hid_to_hidden_update], axis=1)

        # Stack gate biases into a (3*num_units) vector
        b_stacked = cgt.concatenate(
            [self.b_resetgate, self.b_updategate,
             self.b_hidden_update], axis=1)

        # At each loop, input_n will be (n_time_steps, 3*num_units).
        # We define a slicing function that extract the input to each GRU gate
        def slice_w(x, n):
            return x[:, n*self.num_units:(n+1)*self.num_units]

        # Create single recurrent computation step function
        # input__n is the n'th vector of the input
        def step(input_n, hid_previous, W_hid_stacked, W_in_stacked, b_stacked):
            # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1}, and W_{hc} h_{t - 1}
            hid_input = cgt.dot(hid_previous, W_hid_stacked)

            # Compute W_{xr}x_t + b_r, W_{xu}x_t + b_u, and W_{xc}x_t + b_c
            input_n = cgt.broadcast("+", input_n.dot(W_in_stacked), b_stacked, "xx,1x")

            # Reset and update gates
            resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0)
            updategate = slice_w(hid_input, 1) + slice_w(input_n, 1)
            resetgate = self.nonlinearity_resetgate(resetgate)
            updategate = self.nonlinearity_updategate(updategate)

            # Compute W_{xc}x_t + r_t \odot (W_{hc} h_{t - 1})
            hidden_update_in = slice_w(input_n, 2)
            hidden_update_hid = slice_w(hid_input, 2)
            hidden_update = hidden_update_in + resetgate*hidden_update_hid

            # Compute (1 - u_t)h_{t - 1} + u_t c_t
            hid = (1 - updategate)*hid_previous + updategate*hidden_update
            return hid

        sequences = [input_tbf]
        step_fun = step
        hid_init = cgt.dot(cgt.ones((self.num_batches, 1)), self.hid_init)

        # The hidden-to-hidden weight matrix is always used in step
        non_seqs = [W_hid_stacked]
        # When we aren't precomputing the input outside of scan, we need to
        # provide the input weights and biases to the step function
        non_seqs += [W_in_stacked, b_stacked]
        # theano.scan only allows for positional arguments, so when
        # self.precompute_input is True, we need to supply fake placeholder
        # arguments for the input weights and biases.

        # Retrieve the dimensionality of the incoming layer
        hid_out = unroll_lstm(

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = cgt.dimshuffle(hid_out, [1, 0, 2])

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = cgt.flip(hid_out, [1])

        return hid_out
Пример #9
    def __call__(self, nn_input_btf):

        # Because scan iterates over the first dimension we dimshuffle to
        # (n_time_steps, n_batch, n_features)
        nn_input_tbf = cgt.dimshuffle(nn_input_btf, [1, 0, 2])
        seq_len, num_batch = nn_input_tbf.shape[0], nn_input_tbf.shape[1]

        def slice_w(x, n):
            return x[:, n*self.num_units:(n+1)*self.num_units]

        # Create single recurrent computation step function
        # input_n is the n'th vector of the input
        def step(input_n, cell_previous, hid_previous, W_hid_stacked, W_in_stacked, b_stacked):

            input_n = cgt.broadcast("+", cgt.dot(input_n, W_in_stacked), b_stacked, "xx,1x")

            # Calculate gates pre-activations and slice
            gates = input_n + cgt.dot(hid_previous, W_hid_stacked)

            # Extract the pre-activation gate values
            ingate = slice_w(gates, 0)
            forgetgate = slice_w(gates, 1)
            cell_input = slice_w(gates, 2)
            outgate = slice_w(gates, 3)

            # Apply nonlinearities
            ingate = self.nonlinearity_ingate(ingate)
            forgetgate = self.nonlinearity_forgetgate(forgetgate)
            cell_input = self.nonlinearity_cell(cell_input)
            outgate = self.nonlinearity_outgate(outgate)

            # Compute new cell value
            cell = forgetgate*cell_previous + ingate*cell_input
            # Compute new hidden unit activation
            hid = outgate*self.nonlinearity(cell)
            return [cell, hid]

        sequences = nn_input_tbf
        step_fun = step

        ones = cgt.ones((num_batch, 1))
        cell_init = cgt.dot(ones, self.cell_init)
        hid_init = cgt.dot(ones, self.hid_init)

        # The hidden-to-hidden weight matrix is always used in step
        non_seqs = [self.W_hid_stacked]
        non_seqs += [self.W_in_stacked, self.b_stacked]
        cell_out, hid_out = unroll_lstm(
            outputs_info=[cell_init, hid_init],

        # dimshuffle back to (n_batch, n_time_steps, n_features))
        hid_out = cgt.dimshuffle(hid_out, [1, 0, 2])

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = cgt.flip(hid_out, [1])

        return hid_out
Пример #10
def dimshuffle(x, *pattern):
    if isinstance(pattern[0], (list, tuple)):
        pattern = pattern[0]
    return cgt.dimshuffle(x, list(pattern))
