예제 #1
0
    def __init__(self, input, n_in, n_out, prefix='hidden', W=None, b=None,
            activation=T.tanh):
        self.input = input
        if W is None:
            W_values = numpy.asarray(
                np.random.uniform(
                    low=-numpy.sqrt(6. / (n_in + n_out)),
                    high=numpy.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)
                ),
                dtype=theano.config.floatX
            )
            if activation == theano.tensor.nnet.sigmoid:
                W_values *= 4

            W = theano.shared(value=W_values, name=_p(prefix, 'W'), borrow=True)

        if b is None:
            b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
            b = theano.shared(value=b_values, name=_p(prefix, 'b'), borrow=True)

        self.W = W
        self.b = b

        self.pre_activation = T.dot(input, self.W) + self.b
        if activation is None:
            self.output = self.pre_activation
        elif activation == T.nnet.softmax:
            shape= self.pre_activation.shape
            tmp = self.pre_activation.reshape((T.prod(shape[:-1]), shape[-1]))
            self.output = T.nnet.softmax(tmp).reshape(shape)
        else:
            self.output = activation(self.pre_activation)

        self.params = {_p(prefix, 'W'):W, _p(prefix, 'b'):b}
예제 #2
0
    def __init__(self,
                 input,
                 n_in,
                 n_out,
                 prefix='hidden',
                 W=None,
                 b=None,
                 activation=T.tanh):
        self.input = input
        if W is None:
            W_values = numpy.asarray(np.random.uniform(
                low=-numpy.sqrt(6. / (n_in + n_out)),
                high=numpy.sqrt(6. / (n_in + n_out)),
                size=(n_in, n_out)),
                                     dtype=theano.config.floatX)
            if activation == theano.tensor.nnet.sigmoid:
                W_values *= 4

            W = theano.shared(value=W_values,
                              name=_p(prefix, 'W'),
                              borrow=True)

        if b is None:
            b_values = numpy.zeros((n_out, ), dtype=theano.config.floatX)
            b = theano.shared(value=b_values,
                              name=_p(prefix, 'b'),
                              borrow=True)

        self.W = W
        self.b = b

        self.pre_activation = T.dot(input, self.W) + self.b
        if activation is None:
            self.output = self.pre_activation
        elif activation == T.nnet.softmax:
            shape = self.pre_activation.shape
            tmp = self.pre_activation.reshape((T.prod(shape[:-1]), shape[-1]))
            self.output = T.nnet.softmax(tmp).reshape(shape)
        else:
            self.output = activation(self.pre_activation)

        self.params = {_p(prefix, 'W'): W, _p(prefix, 'b'): b}
예제 #3
0
    def __init__(self, x, x_mask, n_samples_const, n_features, n_hidden_units, prefix = 'lstm', forget=True):
        # shape check
        n_samples, n_steps = x_mask.shape
        self.x = x.reshape((n_samples, n_features, n_steps)).dimshuffle(2, 0, 1)
        self.x_mask = x_mask.T

        # init the parameters
        self.params = dict()
        num_activations = 3 + forget
        W = self.stacked_ortho_wts(n_features, n_hidden_units, num_activations)
        self.params[_p(prefix, 'W')] = shared(W)
        U = self.stacked_ortho_wts(n_hidden_units, n_hidden_units, num_activations)
        self.params[_p(prefix, 'U')] = shared(U)
        b = np.zeros(num_activations * n_hidden_units)
        self.params[_p(prefix, 'b')] = shared(b)

        out0 = shared(np.zeros((n_samples_const, n_hidden_units)).astype(theano.config.floatX))
        cell0 = shared(np.zeros((n_samples_const, n_hidden_units)).astype(theano.config.floatX))

        def _step(m_, x_, out_tmp, cell_tmp):
            preact = T.dot(out_tmp, self.params[_p(prefix, 'U')]) + x_

            inn_gate = T.nnet.sigmoid(preact[:, :n_hidden_units])
            out_gate = T.nnet.sigmoid(preact[:, n_hidden_units:2*n_hidden_units])
            fgt_gate = T.nnet.sigmoid(
                    preact[:, 2*n_hidden_units:3*n_hidden_units]) if forget else 1 - inn_gate

            # pre activation, tanh
            cell_val = T.tanh(preact[:, -n_hidden_units:])

            cell_val = fgt_gate * cell_tmp + inn_gate * cell_val
            cell_val = m_[:, None] * cell_val + (1. - m_)[:, None] * cell_tmp

            # after activation, linear
            out = out_gate * cell_val
            out = m_[:, None] * out + (1. - m_)[:, None] * out_tmp

            return out, cell_val

        self.param_w = [self.params[_p(prefix, 'W')]]
        self.param_b = [self.params[_p(prefix, 'b')]]
        self.x_prime = T.dot(self.x, self.params[_p(prefix, 'W')]) + self.params[_p(prefix, 'b')][None, None, :]

        rval, updates = theano.scan(_step,
                                    sequences=[self.x_mask, self.x_prime],
                                    outputs_info = [out0, cell0],
                                    name=_p(prefix, '_layers'))
        self.output = rval[0]
        self.n_out = n_hidden_units
예제 #4
0
        def _step(m_, x_, out_tmp, cell_tmp):
            preact = T.dot(out_tmp, self.params[_p(prefix, 'U')]) + x_

            inn_gate = T.nnet.sigmoid(preact[:, :n_hidden_units])
            out_gate = T.nnet.sigmoid(preact[:, n_hidden_units:2*n_hidden_units])
            fgt_gate = T.nnet.sigmoid(
                    preact[:, 2*n_hidden_units:3*n_hidden_units]) if forget else 1 - inn_gate

            # pre activation, tanh
            cell_val = T.tanh(preact[:, -n_hidden_units:])

            cell_val = fgt_gate * cell_tmp + inn_gate * cell_val
            cell_val = m_[:, None] * cell_val + (1. - m_)[:, None] * cell_tmp

            # after activation, linear
            out = out_gate * cell_val
            out = m_[:, None] * out + (1. - m_)[:, None] * out_tmp

            return out, cell_val
예제 #5
0
        def _step(m_, x_, out_tmp, cell_tmp):
            preact = T.dot(out_tmp, self.params[_p(prefix, 'U')]) + x_

            inn_gate = T.nnet.sigmoid(preact[:, :n_hidden_units])
            out_gate = T.nnet.sigmoid(preact[:, n_hidden_units:2 *
                                             n_hidden_units])
            fgt_gate = T.nnet.sigmoid(
                preact[:, 2 * n_hidden_units:3 *
                       n_hidden_units]) if forget else 1 - inn_gate

            # pre activation, tanh
            cell_val = T.tanh(preact[:, -n_hidden_units:])

            cell_val = fgt_gate * cell_tmp + inn_gate * cell_val
            cell_val = m_[:, None] * cell_val + (1. - m_)[:, None] * cell_tmp

            # after activation, linear
            out = out_gate * cell_val
            out = m_[:, None] * out + (1. - m_)[:, None] * out_tmp

            return out, cell_val
예제 #6
0
    def __init__(self,
                 x,
                 x_mask,
                 n_samples_const,
                 n_features,
                 n_hidden_units,
                 prefix='lstm',
                 forget=True):
        # shape check
        n_samples, n_steps = x_mask.shape
        self.x = x.reshape(
            (n_samples, n_features, n_steps)).dimshuffle(2, 0, 1)
        self.x_mask = x_mask.T

        # init the parameters
        self.params = dict()
        num_activations = 3 + forget
        W = self.stacked_ortho_wts(n_features, n_hidden_units, num_activations)
        self.params[_p(prefix, 'W')] = shared(W)
        U = self.stacked_ortho_wts(n_hidden_units, n_hidden_units,
                                   num_activations)
        self.params[_p(prefix, 'U')] = shared(U)
        b = np.zeros(num_activations * n_hidden_units)
        self.params[_p(prefix, 'b')] = shared(b)

        out0 = shared(
            np.zeros((n_samples_const,
                      n_hidden_units)).astype(theano.config.floatX))
        cell0 = shared(
            np.zeros((n_samples_const,
                      n_hidden_units)).astype(theano.config.floatX))

        def _step(m_, x_, out_tmp, cell_tmp):
            preact = T.dot(out_tmp, self.params[_p(prefix, 'U')]) + x_

            inn_gate = T.nnet.sigmoid(preact[:, :n_hidden_units])
            out_gate = T.nnet.sigmoid(preact[:, n_hidden_units:2 *
                                             n_hidden_units])
            fgt_gate = T.nnet.sigmoid(
                preact[:, 2 * n_hidden_units:3 *
                       n_hidden_units]) if forget else 1 - inn_gate

            # pre activation, tanh
            cell_val = T.tanh(preact[:, -n_hidden_units:])

            cell_val = fgt_gate * cell_tmp + inn_gate * cell_val
            cell_val = m_[:, None] * cell_val + (1. - m_)[:, None] * cell_tmp

            # after activation, linear
            out = out_gate * cell_val
            out = m_[:, None] * out + (1. - m_)[:, None] * out_tmp

            return out, cell_val

        self.param_w = [self.params[_p(prefix, 'W')]]
        self.param_b = [self.params[_p(prefix, 'b')]]
        self.x_prime = T.dot(self.x, self.params[_p(
            prefix, 'W')]) + self.params[_p(prefix, 'b')][None, None, :]

        rval, updates = theano.scan(_step,
                                    sequences=[self.x_mask, self.x_prime],
                                    outputs_info=[out0, cell0],
                                    name=_p(prefix, '_layers'))
        self.output = rval[0]
        self.n_out = n_hidden_units