Пример #1
0
    def step(self, y_prev, mask, state, keys, values, key_mask, domain_keys,
             domain_annot):
        mask = mask[:, None]
        # s_j^{\prime} = GRU^1(y_{j-1}, s_{j-1})
        _, state_prime = self.cell1(y_prev, state, scope="gru1")
        state_prime = (1.0 - mask) * state + mask * state_prime
        # c_j = att(H, s_j^{\prime})
        alpha = attention(state_prime, keys, key_mask, self.dim_hid,
                          self.dim_key)
        context = T.sum(alpha[:, :, None] * values, 0)
        d_alpha = attention(state_prime,
                            domain_keys,
                            key_mask,
                            self.dim_hid,
                            self.dim_key,
                            scope="domain_context")
        d_context = T.sum(d_alpha[:, :, None] * domain_annot, 0)

        gate = nn.feedforward(
            [state_prime, context, d_context],
            [[self.dim_hid, self.dim_value, self.dim_value], self.dim_value],
            True,
            scope="context_gate")
        context = gate * context + (1 - gate) * d_context

        # s_j = GRU^2(c_j, s_j^{\prime})
        output, next_state = self.cell2(context, state_prime, scope="gru2")
        next_state = (1.0 - mask) * state + mask * next_state

        return next_state, context
Пример #2
0
 def step(self, y_prev, mask, state, keys, values, key_mask):
     mask = mask[:, None]
     alpha = attention(state, keys, key_mask, self.dim_hid, self.dim_key)
     context = T.sum(alpha[:, :, None] * values, 0)
     output, next_state = self.cell([y_prev, context], state)
     next_state = (1.0 - mask) * state + mask * next_state
     return next_state, context
Пример #3
0
        def attention_loop(inputs, mask, state, keys, values, key_mask):
            mask = mask[:, None]
            alpha = attention(state, keys, key_mask, self.dim_hid,
                              self.dim_key)
            context = T.sum(alpha[:, :, None] * values, 0)
            output, next_state = self.cell([inputs, context], state)
            next_state = (1.0 - mask) * state + mask * next_state

            return [alpha, next_state]
Пример #4
0
    def forward(self,
                y_seq,
                y_emb,
                mask,
                keys,
                key_mask,
                values,
                initial_state,
                domain_keys,
                domain_annot,
                tag_seq,
                keep_prob=1.0):
        # shift embedding
        y_shifted = T.zeros_like(y_emb)
        y_shifted = T.set_subtensor(y_shifted[1:], y_emb[:-1])
        y_emb = y_shifted
        # feed
        states, contexts = Decoder.scan(self, y_emb, mask, keys, key_mask,
                                        values, initial_state, domain_keys,
                                        domain_annot)

        with ops.variable_scope("DSAdec"):
            newmask = T.set_subtensor(
                mask[T.cast(T.sum(mask, 0) - 1, 'int32'),
                     T.arange(mask.shape[1])], 0.0)
            # domain_alpha = domain_sensitive_attention(states, newmask, self.dim_hid, self.dim_domain)
            domain_alpha = attention(states[-1], states, newmask, self.dim_hid,
                                     self.dim_hid)
            domain_states = states * domain_alpha[:, :, None]

            # batch * (shdim * 2)
            domain_context = T.sum(domain_states, 0)
            # batch * feadim1
            feature = nn.feedforward(domain_context,
                                     [self.dim_hid, self.feadim],
                                     True,
                                     activation=T.tanh,
                                     scope="feature")

            dscores = nn.feedforward(feature, [self.feadim, self.dnum],
                                     True,
                                     activation=T.tanh,
                                     scope="score")
            # (batch, 4)
            dprobs = T.nnet.softmax(dscores)
            pred_tag = T.argmax(dprobs, 1)
            didx = T.arange(tag_seq.flatten().shape[0])
            dce = -T.log(dprobs[didx, tag_seq.flatten()])
            domaincost = T.mean(dce)

        # p(y_j) \propto f(y_{j-1}, s_{j}, c_{j})
        probs = self.prediction(y_emb, states, contexts, keep_prob)

        # compute cost
        cost, snt_cost = self.get_cost(y_seq, mask, probs, domain_alpha)
        return states, contexts, cost, domaincost, pred_tag, snt_cost
Пример #5
0
    def step(self, y_prev, mask, state, *args):
        n_src = self.n_src
        assert len(args) == self.n_src * 3
        src_keys = args[:n_src]
        src_values = args[n_src:2 * n_src]
        src_masks = args[2 * n_src:]

        mask = mask[:, None]
        # s_j^{\prime} = GRU^1(y_{j-1}, s_{j-1})
        _, state_prime = self.cell1(y_prev, state, scope="gru1")
        state_prime = (1.0 - mask) * state + mask * state_prime
        # c_j = att(H, s_j^{\prime})
        contexts = []
        for i, _key, _val, _mask in itertools.izip(itertools.count(), src_keys,
                                                   src_values, src_masks):
            alpha = attention(state_prime,
                              _key,
                              _mask,
                              self.dim_hid,
                              self.dim_key,
                              scope='attn_alpha_%d' % i)
            context = theano.tensor.sum(alpha[:, :, None] * _val, 0)
            contexts.append(context)
        if self.method == "attn":
            contexts = T.reshape(T.concatenate(contexts, 0),
                                 [n_src] + list(contexts[0].shape))
            with ops.variable_scope("beta"):
                beta_keys = map_key(contexts, self.dim_value, self.dim_key)

            beta = attention(state_prime,
                             beta_keys,
                             T.ones(contexts.shape[:2]),
                             self.dim_hid,
                             self.dim_key,
                             scope='beta')
            context = T.sum(beta[:, :, None] * contexts, 0)
        elif self.method == "concat":
            context = T.concatenate(contexts, -1)

        # s_j = GRU^2(c_j, s_j^{\prime})
        output, next_state = self.cell2(context, state_prime, scope="gru2")
        next_state = (1.0 - mask) * state + mask * next_state
        return next_state, context
Пример #6
0
        def sampling_loop(inputs, state, keys, values, key_mask):
            alpha = attention(state, keys, key_mask, self.dim_hid,
                              self.dim_key)
            context = T.sum(alpha[:, :, None] * values, 0)
            probs = self.prediction(inputs, state, context)
            next_words = ops.random.multinomial(probs).argmax(axis=1)
            new_inputs = nn.embedding_lookup(target_embedding, next_words)
            new_inputs = new_inputs + target_bias
            output, next_state = self.cell([inputs, context], state)

            return [next_words, new_inputs, next_state]
Пример #7
0
 def attention_loop(inputs, mask, state, keys, values, key_mask):
     mask = mask[:, None]
     # s_j^{\prime} = GRU^1(y_{j-1}, s_{j-1})
     _, state_prime = self.cell1(inputs, state, scope="gru1")
     # c_j = att(H, s_j^{\prime})
     alpha = attention(state_prime, keys, key_mask, self.dim_hid,
                       self.dim_key)
     context = T.sum(alpha[:, :, None] * values, 0)
     # s_j = GRU^2(c_j, s_j^{\prime})
     output, next_state = self.cell2(context, state_prime, scope="gru2")
     next_state = (1.0 - mask) * state + mask * next_state
     return [alpha, next_state]
Пример #8
0
        def sampling_loop(inputs, state, keys, values, key_mask):
            _, state_prime = self.cell1(inputs, state, scope="gru1")
            alpha = attention(state_prime, keys, key_mask, self.dim_hid,
                              self.dim_key)
            context = T.sum(alpha[:, :, None] * values, 0)
            output, next_state = self.cell2(context, state_prime, scope="gru2")
            probs = self.prediction(
                inputs, next_state,
                context)  # p(y_j) \propto f(y_{j-1}, c_j, s_j)
            next_words = ops.random.multinomial(probs).argmax(axis=1)
            new_inputs = nn.embedding_lookup(target_embedding, next_words)
            new_inputs = new_inputs + target_bias

            return [next_words, new_inputs, next_state]