Пример #1
0
def build_embedding_decoder(
    model,
    decoder_layer_configs,
    inputs,
    input_lengths,
    encoder_lengths,
    encoder_outputs,
    weighted_encoder_outputs,
    final_encoder_hidden_states,
    final_encoder_cell_states,
    encoder_units_per_layer,
    vocab_size,
    embeddings,
    embedding_size,
    attention_type,
    forward_only,
    num_gpus=0,
    scope=None,
):
    with core.NameScope(scope or ''):
        if num_gpus == 0:
            embedded_decoder_inputs = model.net.Gather(
                [embeddings, inputs],
                ['embedded_decoder_inputs'],
            )
        else:
            with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
                embedded_decoder_inputs_cpu = model.net.Gather(
                    [embeddings, inputs],
                    ['embedded_decoder_inputs_cpu'],
                )
            embedded_decoder_inputs = model.CopyCPUToGPU(
                embedded_decoder_inputs_cpu,
                'embedded_decoder_inputs',
            )

    decoder_cells = []
    decoder_units_per_layer = []
    for i, layer_config in enumerate(decoder_layer_configs):
        num_units = layer_config['num_units']
        decoder_units_per_layer.append(num_units)

        if i == 0:
            input_size = embedding_size
        else:
            input_size = decoder_cells[-1].get_output_dim()

        cell = rnn_cell.LSTMCell(
            forward_only=forward_only,
            input_size=input_size,
            hidden_size=num_units,
            forget_bias=0.0,
            memory_optimization=False,
        )

        dropout_keep_prob = layer_config.get('dropout_keep_prob', None)
        if dropout_keep_prob is not None:
            dropout_ratio = 1.0 - layer_config.dropout_keep_prob
            cell = rnn_cell.DropoutCell(
                internal_cell=cell,
                dropout_ratio=dropout_ratio,
                forward_only=forward_only,
                is_test=False,
                name=get_layer_scope(scope, 'decoder_dropout', i),
            )

        decoder_cells.append(cell)

    states = build_initial_rnn_decoder_states(
        model=model,
        encoder_units_per_layer=encoder_units_per_layer,
        decoder_units_per_layer=decoder_units_per_layer,
        final_encoder_hidden_states=final_encoder_hidden_states,
        final_encoder_cell_states=final_encoder_cell_states,
        use_attention=(attention_type != 'none'),
    )
    attention_decoder = LSTMWithAttentionDecoder(
        encoder_outputs=encoder_outputs,
        encoder_output_dim=encoder_units_per_layer[-1],
        encoder_lengths=encoder_lengths,
        vocab_size=vocab_size,
        attention_type=attention_type,
        embedding_size=embedding_size,
        decoder_num_units=decoder_units_per_layer[-1],
        decoder_cells=decoder_cells,
        weighted_encoder_outputs=weighted_encoder_outputs,
        name=scope,
    )
    decoder_outputs, _ = attention_decoder.apply_over_sequence(
        model=model,
        inputs=embedded_decoder_inputs,
        seq_lengths=input_lengths,
        initial_states=states,
    )

    # we do softmax over the whole sequence
    # (max_length in the batch * batch_size) x decoder embedding size
    # -1 because we don't know max_length yet
    decoder_outputs_flattened, _ = model.net.Reshape(
        [decoder_outputs],
        [
            'decoder_outputs_flattened',
            'decoder_outputs_and_contexts_combination_old_shape',
        ],
        shape=[-1, attention_decoder.get_output_dim()],
    )

    decoder_outputs = decoder_outputs_flattened
    decoder_output_dim = attention_decoder.get_output_dim()

    return (decoder_outputs, decoder_output_dim)
Пример #2
0
def rnn_unidirectional_layer(
    model,
    inputs,
    input_lengths,
    input_size,
    num_units,
    dropout_keep_prob,
    forward_only,
    return_sequence_output,
    return_final_state,
    scope=None,
):
    """ Unidirectional LSTM encoder."""
    with core.NameScope(scope):
        initial_cell_state = model.param_init_net.ConstantFill(
            [],
            'initial_cell_state',
            shape=[num_units],
            value=0.0,
        )
        initial_hidden_state = model.param_init_net.ConstantFill(
            [],
            'initial_hidden_state',
            shape=[num_units],
            value=0.0,
        )

    cell = rnn_cell.LSTMCell(
        input_size=input_size,
        hidden_size=num_units,
        forget_bias=0.0,
        memory_optimization=False,
        name=(scope + '/' if scope else '') + 'lstm',
        forward_only=forward_only,
    )

    dropout_ratio = (None if dropout_keep_prob is None else
                     (1.0 - dropout_keep_prob))
    if dropout_ratio is not None:
        cell = rnn_cell.DropoutCell(
            internal_cell=cell,
            dropout_ratio=dropout_ratio,
            name=(scope + '/' if scope else '') + 'dropout',
            forward_only=forward_only,
            is_test=False,
        )

    outputs_with_grads = []
    if return_sequence_output:
        outputs_with_grads.append(0)
    if return_final_state:
        outputs_with_grads.extend([1, 3])

    outputs, (_, final_hidden_state, _,
              final_cell_state) = (cell.apply_over_sequence(
                  model=model,
                  inputs=inputs,
                  seq_lengths=input_lengths,
                  initial_states=(initial_hidden_state, initial_cell_state),
                  outputs_with_grads=outputs_with_grads,
              ))
    return outputs, final_hidden_state, final_cell_state
Пример #3
0
def _prepare_attention(t,
                       n,
                       dim_in,
                       encoder_dim,
                       forward_only=False,
                       T=None,
                       dim_out=None,
                       residual=False,
                       final_dropout=False):
    if dim_out is None:
        dim_out = [dim_in]
    print("Dims: t={} n={} dim_in={} dim_out={}".format(t, n, dim_in, dim_out))

    model = ModelHelper(name='external')

    def generate_input_state(shape):
        return np.random.random(shape).astype(np.float32)

    initial_states = []
    for layer_id, d in enumerate(dim_out):
        h, c = model.net.AddExternalInputs(
            "hidden_init_{}".format(layer_id),
            "cell_init_{}".format(layer_id),
        )
        initial_states.extend([h, c])
        workspace.FeedBlob(h, generate_input_state((1, n, d)))
        workspace.FeedBlob(c, generate_input_state((1, n, d)))

    awec_init = model.net.AddExternalInputs([
        'initial_attention_weighted_encoder_context',
    ])
    initial_states.append(awec_init)
    workspace.FeedBlob(
        awec_init,
        generate_input_state((1, n, encoder_dim)),
    )

    # Due to convoluted RNN scoping logic we make sure that things
    # work from a namescope
    with scope.NameScope("test_name_scope"):
        (
            input_blob,
            seq_lengths,
            encoder_outputs,
            weighted_encoder_outputs,
        ) = model.net.AddScopedExternalInputs(
            'input_blob',
            'seq_lengths',
            'encoder_outputs',
            'weighted_encoder_outputs',
        )

        layer_input_dim = dim_in
        cells = []
        for layer_id, d in enumerate(dim_out):

            cell = rnn_cell.MILSTMCell(
                name='decoder_{}'.format(layer_id),
                forward_only=forward_only,
                input_size=layer_input_dim,
                hidden_size=d,
                forget_bias=0.0,
                memory_optimization=False,
            )
            cells.append(cell)
            layer_input_dim = d

        decoder_cell = rnn_cell.MultiRNNCell(
            cells,
            name='decoder',
            residual_output_layers=range(1, len(cells)) if residual else None,
        )

        attention_cell = rnn_cell.AttentionCell(
            encoder_output_dim=encoder_dim,
            encoder_outputs=encoder_outputs,
            encoder_lengths=None,
            decoder_cell=decoder_cell,
            decoder_state_dim=dim_out[-1],
            name='attention_decoder',
            attention_type=AttentionType.Recurrent,
            weighted_encoder_outputs=weighted_encoder_outputs,
            attention_memory_optimization=True,
        )
        if final_dropout:
            # dropout ratio of 0.0 used to test mechanism but not interfere
            # with numerical tests
            attention_cell = rnn_cell.DropoutCell(
                internal_cell=attention_cell,
                dropout_ratio=0.0,
                name='dropout',
                forward_only=forward_only,
            )

        attention_cell = (attention_cell if T is None else
                          rnn_cell.UnrolledCell(attention_cell, T))

        output_indices = decoder_cell.output_indices
        output_indices.append(2 * len(cells))
        outputs_with_grads = [2 * i for i in output_indices]

        final_output, state_outputs = attention_cell.apply_over_sequence(
            model=model,
            inputs=input_blob,
            seq_lengths=seq_lengths,
            initial_states=initial_states,
            outputs_with_grads=outputs_with_grads,
        )

    workspace.RunNetOnce(model.param_init_net)

    workspace.FeedBlob(
        seq_lengths,
        np.random.randint(1, t + 1, size=(n, )).astype(np.int32))

    return {
        'final_output': final_output,
        'net': model.net,
        'initial_states': initial_states,
        'input_blob': input_blob,
        'encoder_outputs': encoder_outputs,
        'weighted_encoder_outputs': weighted_encoder_outputs,
        'outputs_with_grads': outputs_with_grads,
    }