def __init__(self, inputs, mask, load_from=None, rand_init_params=None):
        '''rand_init_params: (rng, (n_in, n_out))
        n_in = emb_dim (* context window size)
        n_out = n_hidden
        '''
        self.inputs = inputs
        self.mask = mask

        if load_from is not None:
            W_values = pickle.load(load_from)
            U_values = pickle.load(load_from)
            b_values = pickle.load(load_from)

            n_out = W_values.shape[1]
        elif rand_init_params is not None:
            rng, (n_in, n_out) = rand_init_params

            limS = 4 * (6 / (n_in + n_out))**0.5

            W_values = rand_matrix(rng, limS, (n_in, n_out))
            U_values = rand_matrix(rng, limS, (n_out, n_out))
            b_values = np.zeros(n_out, dtype=theano.config.floatX)
        else:
            raise Exception('Invalid initial inputs!')

        self.W = theano.shared(value=W_values, name='rnn_W', borrow=True)
        self.U = theano.shared(value=U_values, name='rnn_U', borrow=True)
        self.b = theano.shared(value=b_values, name='rnn_b', borrow=True)

        self.params = [self.W, self.U, self.b]

        def _step(m_t, x_t, h_tm1):
            # hidden units at time t, h(t) is formed from THREE parts:
            #   input at time t, x(t)
            #   hidden units at time t-1, h(t-1)
            #   hidden layer bias, b
            h_t = T.nnet.sigmoid(
                T.dot(x_t, self.W) + T.dot(h_tm1, self.U) + self.b)
            # mask
            h_t = m_t[:, None] * h_t + (1 - m_t)[:, None] * h_tm1
            return h_t

        n_steps, n_samples, emb_dim = inputs.shape
        hs, updates = theano.scan(
            fn=_step,
            sequences=[mask, inputs],
            outputs_info=[
                T.alloc(np.asarray(0., dtype=theano.config.floatX), n_samples,
                        n_out)
            ])

        self.outputs = hs
    def __init__(self,
                 inputs,
                 load_from=None,
                 rand_init_params=None,
                 gensim_w2v=None,
                 dic=None):
        '''rand_init_params: (rng, (voc_dim, emb_dim))
        '''
        self.inputs = inputs

        if load_from is not None:
            W_values = pickle.load(load_from)
        elif rand_init_params is not None:
            rng, (voc_dim, emb_dim) = rand_init_params
            W_values = rand_matrix(rng, 1, (voc_dim, emb_dim))

            if gensim_w2v is not None and dic is not None:
                assert gensim_w2v.vector_size == emb_dim

                n_sub = 0
                for idx, word in dic._idx2word.items():
                    if word in gensim_w2v.wv:
                        W_values[idx] = gensim_w2v.wv[word]
                        n_sub += 1
                print('Substituted words by word2vec: %d/%d' %
                      (n_sub, voc_dim))

            W_values = normalize_matrix(W_values)
        else:
            raise Exception('Invalid initial inputs!')

        self.W = theano.shared(value=W_values, name='emb_W', borrow=True)

        self.params = [self.W]
        self.outputs = self.W[inputs]
示例#3
0
    def __init__(self, inputs, activation=T.tanh, load_from=None, rand_init_params=None):
        '''rand_init_params: (rng, (n_in, n_out))
        '''
        self.inputs = inputs
        self.activation = activation

        if load_from is not None:
            W_values = pickle.load(load_from)
            b_values = pickle.load(load_from)
        elif rand_init_params is not None:
            rng, (n_in, n_out) = rand_init_params

            limT = (6 / (n_in + n_out)) ** 0.5
            W_values = rand_matrix(rng, limT, (n_in, n_out))
            if activation is T.nnet.sigmoid:
                W_values *= 4
            b_values = np.zeros(n_out, dtype=theano.config.floatX)
        else:
            raise Exception('Invalid initial inputs!')

        self.W = theano.shared(value=W_values, name='hidden_W', borrow=True)
        self.b = theano.shared(value=b_values, name='hidden_b', borrow=True)

        self.params = [self.W, self.b]

        linear_out = T.dot(inputs, self.W) + self.b
        self.outputs = linear_out if activation is None else activation(linear_out)
    def __init__(self,
                 inputs,
                 image_shape,
                 load_from=None,
                 rand_init_params=None):
        '''rand_init_params: (rng, filter_shape)
        inputs: (batch size, stack size, n_words/steps, emb_dim)

        filter_shape: (output stack size, input stack size, filter height, filter width)
            output stack size = ?
            input stack size = 1
            filter height = ?
            filter width = emb_dim (* context window size)

        image_shape(input shape): (batch_size, input stack size, input feature map height, input feature map width)
            batch_size = ?
            input stack size = 1
            input feature map height = n_words/steps
            input feature map width = emb_dim (* context window size)

        output shape: (batch size, output stack size, output feature map height, output feature map width)
            batch_size = ?
            output stack size = ?
            output feature map height = n_words/steps - filter height + 1
            output feature map width = 1
        '''
        self.inputs = inputs

        if load_from is not None:
            W_values = pickle.load(load_from)
            b_values = pickle.load(load_from)

            filter_shape = W_values.shape
        elif rand_init_params is not None:
            rng, filter_shape = rand_init_params
            fan_in = filter_shape[1] * filter_shape[2] * filter_shape[3]
            fan_out = filter_shape[0] * filter_shape[2] * filter_shape[3]
            limT = (6 / (fan_in + fan_out))**0.5

            W_values = rand_matrix(rng, limT, filter_shape)
            b_values = np.zeros(filter_shape[0], dtype=theano.config.floatX)
        else:
            raise Exception('Invalid initial inputs!')

        self.W = theano.shared(value=W_values, name='conv_W', borrow=True)
        self.b = theano.shared(value=b_values, name='conv_b', borrow=True)
        self.params = [self.W, self.b]

        conv_res = conv.conv2d(input=self.inputs,
                               filters=self.W,
                               filter_shape=filter_shape,
                               image_shape=image_shape)
        self.outputs = T.tanh(conv_res + self.b[None, :, None, None])
示例#5
0
    def __init__(self, inputs, mask, load_from=None, rand_init_params=None):
        '''rand_init_params: (rng, (n_in, n_out))
        n_in = emb_dim (* context window size)
        n_out = n_hidden
        '''
        self.inputs = inputs
        self.mask = mask

        if load_from is not None:
            W_values = pickle.load(load_from)
            U_values = pickle.load(load_from)
            b_values = pickle.load(load_from)

            n_out = W_values.shape[1] // 4
        elif rand_init_params is not None:
            rng, (n_in, n_out) = rand_init_params

            limT = (6 / (n_in + n_out * 2))**0.5
            limS = 4 * limT
            # [Wi, Wf, Wo, Wc]
            W_values = rand_matrix(rng, limS, (n_in, 4 * n_out))
            W_values[:, (3 * n_out):(4 * n_out)] /= 4
            # [Ui, Uf, Uo, Uc]
            U_values = rand_matrix(rng, limS, (n_out, 4 * n_out))
            U_values[:, (3 * n_out):(4 * n_out)] /= 4
            # [bi, bf, bo, bc]
            b_values = np.zeros(4 * n_out, dtype=theano.config.floatX)
        else:
            raise Exception('Invalid initial inputs!')

        self.W = theano.shared(value=W_values, name='lstm_W', borrow=True)
        self.U = theano.shared(value=U_values, name='lstm_U', borrow=True)
        self.b = theano.shared(value=b_values, name='lstm_b', borrow=True)

        self.params = [self.W, self.U, self.b]

        def _step(m_t, x_t, h_tm1, c_tm1):
            # x_t is a row of embeddings for several words in same position of different sentences in a minibatch
            # x_t has dimension of (n_samples, n_emb), so it is a matrix
            # m_t is a row of mask matrix, so it is a vector, with dimension of (n_samples, )
            # h_t and c_t are all (n_samples, n_hidden)
            linear_res = T.dot(x_t, self.W) + T.dot(h_tm1, self.U) + self.b

            i_t = T.nnet.sigmoid(linear_res[:, (0 * n_out):(1 * n_out)])
            f_t = T.nnet.sigmoid(linear_res[:, (1 * n_out):(2 * n_out)])
            o_t = T.nnet.sigmoid(linear_res[:, (2 * n_out):(3 * n_out)])
            c_t = T.tanh(linear_res[:, (3 * n_out):(4 * n_out)])

            c_t = f_t * c_tm1 + i_t * c_t
            c_t = m_t[:, None] * c_t + (1 - m_t)[:, None] * c_tm1

            h_t = o_t * T.tanh(c_t)
            h_t = m_t[:, None] * h_t + (1 - m_t)[:, None] * h_tm1
            return h_t, c_t

        n_steps, n_samples, emb_dim = inputs.shape
        (hs, cs), updates = theano.scan(
            fn=_step,
            sequences=[mask, inputs],
            outputs_info=[
                T.alloc(np.asarray(0., dtype=theano.config.floatX), n_samples,
                        n_out),
                T.alloc(np.asarray(0., dtype=theano.config.floatX), n_samples,
                        n_out)
            ])
        self.outputs = hs