示例#1
0
    def prediction(self, y_emb, state, context, keep_prob=1.0):
        """
        maxout -> readout -> softmax
        p(y_j) \propto f(y_{j-1}, s_{j-1}, c_{j})
        :param y_emb: 
        :param state: 
        :param context: 
        :param keep_prob: 
        :return: 
        """

        features = [state, y_emb, context]
        maxhid = nn.maxout(
            features,
            [[self.dim_hid, self.dim_y, self.dim_value], self.dim_maxout],
            self.max_part, True)
        readout = nn.linear(maxhid, [self.dim_maxout, self.dim_readout],
                            False,
                            scope="readout")

        if keep_prob < 1.0:
            readout = nn.dropout(readout, keep_prob=keep_prob)

        logits = nn.linear(readout, [self.dim_readout, self.n_y_vocab],
                           True,
                           scope="logits")

        if logits.ndim == 3:
            new_shape = [logits.shape[0] * logits.shape[1], -1]
            logits = logits.reshape(new_shape)

        probs = T.nnet.softmax(logits)

        return probs
示例#2
0
文件: rnnsearch.py 项目: Playinf/nmt
        def prediction(prev_inputs, prev_state, context, keep_prob=1.0):
            features = [prev_state, prev_inputs, context]
            maxhid = nn.maxout(features, [[thdim, tedim, 2 * shdim], maxdim],
                               maxpart, True)
            readout = nn.linear(maxhid, [maxdim, deephid], False,
                                scope="deepout")

            if keep_prob < 1.0:
                readout = nn.dropout(readout, keep_prob=keep_prob)

            logits = nn.linear(readout, [deephid, tvsize], True,
                               scope="logits")

            if logits.ndim == 3:
                new_shape = [logits.shape[0] * logits.shape[1], -1]
                logits = logits.reshape(new_shape)

            probs = theano.tensor.nnet.softmax(logits)

            return probs
示例#3
0
    def __init__(self,
                 emb_size,
                 shidden_size,
                 thidden_size,
                 ahidden_size,
                 mhidden_size,
                 maxpart,
                 dhidden_size,
                 voc_size,
                 config=decoder_config()):
        scope = config.scope
        ctx_size = 2 * shidden_size

        with variable_scope(scope):
            init_transform = feedforward(shidden_size, thidden_size,
                                         config.init_transform)
            annotation_transform = linear(ctx_size, ahidden_size,
                                          config.annotation_transform)
            state_transform = linear(thidden_size, ahidden_size,
                                     config.state_transform)
            context_transform = linear(ahidden_size, 1,
                                       config.context_transform)
            rnn = gru([emb_size, ctx_size], thidden_size, config.rnn)
            maxout_transform = maxout([thidden_size, emb_size, ctx_size],
                                      mhidden_size, maxpart, config.maxout)
            deepout_transform = linear(mhidden_size, dhidden_size,
                                       config.deepout)
            classify_transform = linear(dhidden_size, voc_size,
                                        config.classify)

        params = []
        params.extend(init_transform.parameter)
        params.extend(annotation_transform.parameter)
        params.extend(state_transform.parameter)
        params.extend(context_transform.parameter)
        params.extend(rnn.parameter)
        params.extend(maxout_transform.parameter)
        params.extend(deepout_transform.parameter)
        params.extend(classify_transform.parameter)

        def attention(state, xmask, mapped_annotation):
            mapped_state = state_transform(state)
            hidden = theano.tensor.tanh(mapped_state + mapped_annotation)
            score = context_transform(hidden)
            score = score.reshape((score.shape[0], score.shape[1]))
            # softmax over masked batch
            alpha = theano.tensor.exp(score)
            alpha = alpha * xmask
            alpha = alpha / theano.tensor.sum(alpha, 0)
            return alpha

        def compute_initstate(annotation):
            hb = annotation[0, :, -annotation.shape[2] / 2:]
            inis = init_transform(hb)
            mapped_annotation = annotation_transform(annotation)

            return inis, mapped_annotation

        def compute_context(state, xmask, annotation, mapped_annotation):
            alpha = attention(state, xmask, mapped_annotation)
            context = theano.tensor.sum(alpha[:, :, None] * annotation, 0)
            return [alpha, context]

        def compute_probability(yemb, state, context):
            maxhid = maxout_transform([state, yemb, context])
            readout = deepout_transform(maxhid)
            preact = classify_transform(readout)
            prob = theano.tensor.nnet.softmax(preact)

            return prob

        def compute_state(yemb, ymask, state, context):
            new_state, states = rnn([yemb, context], state)
            ymask = ymask[:, None]
            new_state = (1.0 - ymask) * state + ymask * new_state

            return new_state

        def compute_attention_score(yseq, xmask, ymask, annotation):
            initstate, mapped_annotation = compute_initstate(annotation)

            def step(yemb, ymask, state, xmask, annotation, mannotation):
                outs = compute_context(state, xmask, annotation, mannotation)
                alpha, context = outs
                new_state = compute_state(yemb, ymask, state, context)
                return [new_state, alpha]

            seq = [yseq, ymask]
            oinfo = [initstate, None]
            nonseq = [xmask, annotation, mapped_annotation]
            (states, alpha), updates = theano.scan(step, seq, oinfo, nonseq)

            return alpha

        def forward(yseq, xmask, ymask, annotation):
            yshift = theano.tensor.zeros_like(yseq)
            yshift = theano.tensor.set_subtensor(yshift[1:], yseq[:-1])

            initstate, mapped_annotation = compute_initstate(annotation)

            def step(yemb, ymask, state, xmask, annotation, mannotation):
                outs = compute_context(state, xmask, annotation, mannotation)
                alpha, context = outs
                new_state = compute_state(yemb, ymask, state, context)
                return [new_state, context]

            seq = [yseq, ymask]
            oinfo = [initstate, None]
            nonseq = [xmask, annotation, mapped_annotation]
            (states, contexts), updates = theano.scan(step, seq, oinfo, nonseq)

            inis = initstate[None, :, :]
            all_states = theano.tensor.concatenate([inis, states], 0)
            prev_states = all_states[:-1]

            maxhid = maxout_transform([prev_states, yshift, contexts])
            readout = deepout_transform(maxhid)
            preact = classify_transform(readout)
            preact = preact.reshape((preact.shape[0] * preact.shape[1], -1))
            prob = theano.tensor.nnet.softmax(preact)

            return prob

        self.name = scope
        self.config = config
        self.forward = forward
        self.parameter = params
        self.compute_initstate = compute_initstate
        self.compute_context = compute_context
        self.compute_probability = compute_probability
        self.compute_state = compute_state
        self.compute_attention_score = compute_attention_score