Exemplo n.º 1
0
def predict(x):
    with nn.auto_forward():
        x = x.reshape((1, sentence_length_source))
        enc_input = nn.Variable.from_numpy_array(x)
        enc_input = time_distributed(PF.embed)(enc_input,
                                               vocab_size_source,
                                               embedding_size,
                                               name='enc_embeddings')

        # encoder
        with nn.parameter_scope('encoder'):
            output, c, h = LSTMEncoder(enc_input,
                                       hidden,
                                       return_sequences=True,
                                       return_state=True)

        # decoder
        params = [
            nn.get_parameters()['dec_embeddings/embed/W'],
            nn.get_parameters()['output/affine/W'],
            nn.get_parameters()['output/affine/b']
        ]
        ret = LSTMAttentionDecoder(encoder_output=output,
                                   initial_state=(c, h),
                                   inference_params=params,
                                   name='decoder')

        return ret
Exemplo n.º 2
0
    def compute_context(prev_state):
        batch_size = prev_state.shape[0]
        ht = PF.affine(prev_state,
                       attention_units,
                       with_bias=False,
                       name='Waht')
        # -> (batch_size, attention_units)
        ht = F.reshape(ht, (batch_size, 1, attention_units))
        # -> (batch_size, 1, attention_units)
        ht = F.broadcast(ht,
                         (batch_size, sentence_length_source, attention_units))
        # -> (batch_size, sentence_length_source, attention_units)

        attention = F.tanh(hs + ht)
        # -> (batch_size, sentence_length_source, attention_units)
        attention = time_distributed(PF.affine)(attention,
                                                1,
                                                with_bias=False,
                                                name='attention')
        # -> (batch_size, sentence_length_source, 1)
        attention = F.softmax(attention, axis=1)
        # -> (batch_size, sentence_length_source, 1)

        context = F.batch_matmul(hs, attention, transpose_a=True)
        context = F.reshape(context, (batch_size, attention_units))

        return context
Exemplo n.º 3
0
def build_model():
    x = nn.Variable((batch_size, sentence_length_source))
    input_mask = F.sign(
        F.reshape(F.slice(x), (batch_size, sentence_length_source, 1)))
    y = nn.Variable((batch_size, sentence_length_target))

    enc_input = time_distributed(PF.embed)(x,
                                           vocab_size_source,
                                           embedding_size,
                                           name='enc_embeddings')  #*input_mask
    # -> (batch_size, sentence_length_source, embedding_size)
    dec_input = time_distributed(PF.embed)(y,
                                           vocab_size_target,
                                           embedding_size,
                                           name='dec_embeddings')
    # -> (batch_size, sentence_length_target, embedding_size)

    # encoder
    with nn.parameter_scope('encoder'):
        output, c, h = LSTMEncoder(enc_input,
                                   hidden,
                                   return_sequences=True,
                                   return_state=True)
        # -> (batch_size, sentence_length_source, hidden), (batch_size, hidden), (batch_size, hidden)

    # decoder
    output = LSTMAttentionDecoder(dec_input,
                                  output,
                                  initial_state=(c, h),
                                  return_sequences=True,
                                  name='decoder')
    # -> (batch_size, sentence_length_target, hidden)
    output = time_distributed(PF.affine)(output,
                                         vocab_size_target,
                                         name='output')
    # -> (batch_size, sentence_length_target, vocab_size_target)

    t = F.reshape(F.slice(y), (batch_size, sentence_length_target, 1))

    entropy = time_distributed_softmax_cross_entropy(output, t)

    mask = F.sum(F.sign(t), axis=2)  # do not predict 'pad'.
    count = F.sum(mask, axis=1)

    entropy *= mask
    loss = F.mean(F.sum(entropy, axis=1) / count)
    return x, y, loss
Exemplo n.º 4
0
def build_model(get_embeddings=False):
    x = nn.Variable((batch_size, sentence_length, word_length))

    with nn.parameter_scope('char_embedding'):
        h = PF.embed(x, char_vocab_size, char_embedding_dim)
    h = F.transpose(h, (0, 3, 1, 2))
    output = []
    for f, f_size in zip(filters, filster_sizes):
        _h = PF.convolution(h,
                            f,
                            kernel=(1, f_size),
                            pad=(0, f_size // 2),
                            name='conv_{}'.format(f_size))
        _h = F.max_pooling(_h, kernel=(1, word_length))
        output.append(_h)
    h = F.concatenate(*output, axis=1)
    h = F.transpose(h, (0, 2, 1, 3))

    embeddings = F.reshape(h, (batch_size, sentence_length, sum(filters)))

    if get_embeddings:
        return x, embeddings

    with nn.parameter_scope('highway1'):
        h = time_distributed(highway)(embeddings)
    with nn.parameter_scope('highway2'):
        h = time_distributed(highway)(h)
    with nn.parameter_scope('lstm1'):
        h = lstm(h, lstm_size, return_sequences=True)
    with nn.parameter_scope('lstm2'):
        h = lstm(h, lstm_size, return_sequences=True)
    with nn.parameter_scope('hidden'):
        h = time_distributed(PF.affine)(h, lstm_size)
    with nn.parameter_scope('output'):
        y = time_distributed(PF.affine)(h, word_vocab_size)
    t = nn.Variable((batch_size, sentence_length, 1))

    mask = F.sum(F.sign(t), axis=2)  # do not predict 'pad'.
    entropy = time_distributed_softmax_cross_entropy(y, t) * mask
    count = F.sum(mask, axis=1)
    loss = F.mean(F.div2(F.sum(entropy, axis=1), count))
    return x, t, loss
Exemplo n.º 5
0
                                       shuffle=True,
                                       with_file_cache=False)
valid_data_iter = data_iterator_simple(load_valid_func,
                                       len(x_valid),
                                       batch_size,
                                       shuffle=True,
                                       with_file_cache=False)

x = nn.Variable((batch_size, sentence_length))
t = nn.Variable((batch_size, sentence_length, 1))
with nn.parameter_scope('embedding'):
    h = PF.embed(x, vocab_size, embedding_size)
with nn.parameter_scope('rnn1'):
    h = simple_rnn(h, hidden_size, return_sequences=True)
with nn.parameter_scope('hidden'):
    h = time_distributed(PF.affine)(h, hidden_size)
with nn.parameter_scope('output'):
    y = time_distributed(PF.affine)(h, vocab_size)

mask = F.sum(F.greater_scalar(t, 0), axis=2)  # do not predict 'pad'.
# mask = F.sum(F.sign(t), axis=2) # do not predict 'pad'.
entropy = time_distributed_softmax_cross_entropy(y, t) * mask
count = F.sum(mask, axis=1)
loss = F.mean(F.div2(F.sum(entropy, axis=1), count))

# Create solver.
solver = S.Momentum(1e-2, momentum=0.9)
solver.set_parameters(nn.get_parameters())

# Create monitor.
from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed