Пример #1
0
def test_transformer_decoder_block2():
    """ default settings: encoder block output feed into decoder block """
    a = C.sequence.input_variable(10)
    b = C.sequence.input_variable(10)

    encoder_block = TransformerEncoderBlock(num_heads=2,
                                            model_dim=10,
                                            intermediate_dim=30,
                                            dropout_rate=0.1)
    decoder_block = TransformerDecoderBlock(num_heads=2,
                                            model_dim=10,
                                            intermediate_dim=30,
                                            dropout_rate=0.1,
                                            obey_sequence_order=True,
                                            max_seq_len=100)

    encoded = encoder_block(a)
    decoded = decoder_block(encoded, b)

    assert decoded.shape == (10, )

    n = [
        np.random.random((2, 10)).astype(np.float32),
        np.random.random((4, 10)).astype(np.float32),
        np.random.random((6, 10)).astype(np.float32)
    ]
    m = [
        np.random.random((2, 10)).astype(np.float32),
        np.random.random((4, 10)).astype(np.float32),
        np.random.random((6, 10)).astype(np.float32)
    ]

    results = decoded.eval({a: n, b: m})
Пример #2
0
def test_transformer_decoder_block3():
    """ Typical use case: encoder feed into decoder with multi layer """
    a = C.sequence.input_variable(10)
    b = C.sequence.input_variable(10)

    encoder_block = TransformerEncoderBlock(num_heads=2, model_dim=10, intermediate_dim=30, dropout_rate=0.1)
    decoder_block1 = TransformerDecoderBlock(num_heads=2, model_dim=10, intermediate_dim=30, dropout_rate=0.1,
                                             obey_sequence_order=True, max_seq_len=100)
    decoder_block2 = TransformerDecoderBlock(num_heads=2, model_dim=10, intermediate_dim=30, dropout_rate=0.1,
                                             obey_sequence_order=True, max_seq_len=100)

    encoded = encoder_block(a)
    decoded = decoder_block1(encoded, b)
    decoded = decoder_block2(encoded, decoded)

    assert decoded.shape == (10, )

    n = [np.random.random((2, 10)).astype(np.float32),
         np.random.random((4, 10)).astype(np.float32),
         np.random.random((6, 10)).astype(np.float32)]
    m = [np.random.random((2, 10)).astype(np.float32),
         np.random.random((4, 10)).astype(np.float32),
         np.random.random((6, 10)).astype(np.float32)]

    results = decoded.eval({a: n, b: m})
Пример #3
0
def test_transformer_encoder_block1a():
    """ Default settings: input is seq output is not seq """
    a = C.sequence.input_variable(10)
    encoder_block = TransformerEncoderBlock(num_heads=2, model_dim=10, intermediate_dim=30, dropout_rate=0.1)
    attended = encoder_block(a)

    assert attended.shape == (10, )

    n = [np.random.random((3, 10)).astype(np.float32),
         np.random.random((6, 10)).astype(np.float32)]

    results = attended.eval({a: n})
Пример #4
0
def test_initialisation_transformer_encoder_block():
    """ custom initialise encoder block using numpy """
    model_dim = 768
    intermediate_dim = 3072
    num_heads = 12

    bias = np.random.random((model_dim, )).astype(np.float32)
    kernel = np.random.random((model_dim, model_dim)).astype(np.float32)

    intermediate_bias = np.random.random(
        (intermediate_dim, )).astype(np.float32)
    intermediate_kernel = np.random.random(
        (model_dim, intermediate_dim)).astype(np.float32)

    final_kernel = np.random.random(
        (intermediate_dim, model_dim)).astype(np.float32)

    TransformerEncoderBlock(num_heads=num_heads,
                            model_dim=model_dim,
                            intermediate_dim=intermediate_dim,
                            dropout_rate=0.1,
                            obey_sequence_order=False,
                            max_seq_len=None,
                            key_init=kernel,
                            key_init_bias=bias,
                            query_init=kernel,
                            query_init_bias=bias,
                            value_init=kernel,
                            value_init_bias=bias,
                            mha_init=kernel,
                            mha_init_bias=bias,
                            mha_initial_scale=bias,
                            mha_initial_bias=bias,
                            intermediate_init=intermediate_kernel,
                            intermediate_init_bias=intermediate_bias,
                            init=final_kernel,
                            init_bias=bias,
                            initial_scale=bias,
                            initial_bias=bias)