示例#1
0
def test_to_sequence_basic(device_id):
    dev = cntk_device(device_id)
    x = C.input_variable((C.FreeDimension, 2))
    x_seq = C.to_sequence(x)
    assert len(x_seq.dynamic_axes) == 2

    x_data = np.asarray([[[1, 2], [-1000, -1000]], [[3, 4], [5, 6]]], dtype=np.float32)
    result = x_seq.eval({x : x_data}, device=dev)
    assert np.array_equal(result, x_data)

    x = C.input_variable((C.FreeDimension, 2, 3), is_sparse=True)
    x_seq_lens = C.input_variable(())
    x_seq = C.to_sequence(x, x_seq_lens)

    seq1_data = [[[0, 1, 1], [0, 1, 0]], [[1, 0, 0], [1, 0, 1]]]
    csr_seq1 = _to_csr(seq1_data)
    ndarrayview1 = C.NDArrayView.from_csr(csr_seq1, shape=(2, 2, 3), device=C.cpu())
    seq2_data = [[0, 1, 1], [1, 1, 0]]
    csr_seq2 = _to_csr([seq2_data, [[0, 0, 0], [0, 0, 0]]])
    ndarrayview2 = C.NDArrayView.from_csr(csr_seq2, shape=(2, 2, 3), device=C.cpu())

    x_data = C.Value.create(C.input_variable((2, 2, 3), is_sparse=True), [ndarrayview1, ndarrayview2], device=dev).data
    x_seq_lens_data = np.asarray([2, 1], dtype=np.float32)
    result = x_seq.eval({x : x_data, x_seq_lens : x_seq_lens_data}, device=dev, as_numpy=False)
    result_dense = _to_dense(result, True)
    assert np.array_equal(result_dense[0], seq1_data)
    assert np.array_equal(result_dense[1], [seq2_data])
示例#2
0
def test_sequence_unpack_backprop(device_id):
    dev = cntk_device(device_id)
    input_vocab_size=3
    emb_dim = 2
    hidden_dim = 2
    num_labels = 2
    x_seq_input = C.sequence.input_variable(input_vocab_size, is_sparse=True, name='features')
    label_input = C.input_variable(num_labels, is_sparse=True, name='labels')
    with C.default_options(initial_state=0.1):
        model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.layers.Dense(num_labels, name='classify')(model)

    z = C.sequence.last(C.layers.Recurrence(C.plus)(model))
    ce = C.cross_entropy_with_softmax(z, label_input)
    seq1_data = [[0, 1, 1], [0, 1, 0], [1, 0, 0]]
    seq2_data = [[0, 0, 1], [0, 1, 1]]
    label_data = _to_csr([[0, 1], [1, 0]])
    param_grads_1, loss_result_1 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_input : label_data},
                                           wrt=ce.parameters, outputs=[ce], as_numpy=False)

    z = C.sequence.reduce_sum(model)
    ce = C.cross_entropy_with_softmax(z, label_input)
    param_grads_2, loss_result_2 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_input : label_data},
                                           wrt=ce.parameters, outputs=[ce], as_numpy=False)

    assert np.allclose(loss_result_1.asarray(), loss_result_2.asarray())

    for param in param_grads_1:
        if not param_grads_1[param].is_sparse:
            reference_grad_value = param_grads_1[param].asarray()
            grad_value = param_grads_2[param].asarray()
            assert np.allclose(reference_grad_value, grad_value)
示例#3
0
def test_sequence_unpack_backprop(device_id):
    dev = cntk_device(device_id)
    input_vocab_size=3
    emb_dim = 2
    hidden_dim = 2
    num_labels = 2
    x_seq_input = C.sequence.input_variable(input_vocab_size, is_sparse=True, name='features')
    label_input = C.input_variable(num_labels, is_sparse=True, name='labels')
    with C.default_options(initial_state=0.1):
        model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.layers.Dense(num_labels, name='classify')(model)

    z = C.sequence.last(C.layers.Recurrence(C.plus)(model))
    ce = C.cross_entropy_with_softmax(z, label_input)
    seq1_data = [[0, 1, 1], [0, 1, 0], [1, 0, 0]]
    seq2_data = [[0, 0, 1], [0, 1, 1]]
    label_data = _to_csr([[0, 1], [1, 0]])
    param_grads_1, loss_result_1 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_input : label_data},
                                           wrt=ce.parameters, outputs=[ce], as_numpy=False)

    z = C.sequence.reduce_sum(model)
    ce = C.cross_entropy_with_softmax(z, label_input)
    param_grads_2, loss_result_2 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_input : label_data},
                                           wrt=ce.parameters, outputs=[ce], as_numpy=False)

    assert np.allclose(loss_result_1.asarray(), loss_result_2.asarray())

    for param in param_grads_1:
        if not param_grads_1[param].is_sparse:
            reference_grad_value = param_grads_1[param].asarray()
            grad_value = param_grads_2[param].asarray()
            assert np.allclose(reference_grad_value, grad_value)
示例#4
0
def test_to_sequence_basic(device_id):
    dev = cntk_device(device_id)
    x = C.input_variable((C.FreeDimension, 2))
    x_seq = C.to_sequence(x)
    assert len(x_seq.dynamic_axes) == 2

    x_data = np.asarray([[[1, 2], [-1000, -1000]], [[3, 4], [5, 6]]], dtype=np.float32)
    result = x_seq.eval({x : x_data}, device=dev)
    assert np.array_equal(result, x_data)

    x = C.input_variable((C.FreeDimension, 2, 3), is_sparse=True)
    x_seq_lens = C.input_variable(())
    x_seq = C.to_sequence(x, x_seq_lens)

    seq1_data = [[[0, 1, 1], [0, 1, 0]], [[1, 0, 0], [1, 0, 1]]]
    csr_seq1 = _to_csr(seq1_data)
    ndarrayview1 = C.NDArrayView.from_csr(csr_seq1, shape=(2, 2, 3), device=C.cpu())
    seq2_data = [[0, 1, 1], [1, 1, 0]]
    csr_seq2 = _to_csr([seq2_data, [[0, 0, 0], [0, 0, 0]]])
    ndarrayview2 = C.NDArrayView.from_csr(csr_seq2, shape=(2, 2, 3), device=C.cpu())

    x_data = C.Value.create(C.input_variable((2, 2, 3), is_sparse=True), [ndarrayview1, ndarrayview2], device=dev).data
    x_seq_lens_data = np.asarray([2, 1], dtype=np.float32)
    result = x_seq.eval({x : x_data, x_seq_lens : x_seq_lens_data}, device=dev, as_numpy=False)
    result_dense = _to_dense(result, True)
    assert np.array_equal(result_dense[0], seq1_data)
    assert np.array_equal(result_dense[1], [seq2_data])
def test_2d_sparse_sequences_value(device_id):
    dev = cntk_device(device_id)
    seq1_data = [[[0, 1, 1], [0, 1, 0]], [[1, 0, 0], [1, 0, 1]]]
    csr_seq1 = _to_csr(seq1_data)
    ndarrayview1 = C.NDArrayView.from_csr(csr_seq1, shape=(2, 2, 3), device=C.cpu())
    seq2_data = [[0, 1, 1], [1, 1, 0]]
    csr_seq2 = _to_csr(seq2_data)
    ndarrayview2 = C.NDArrayView.from_csr(csr_seq2, shape=(1, 2, 3), device=C.cpu())

    x = C.sequence.input_variable((2, 3))
    sequence_value = C.Value.create(x, [ndarrayview1, ndarrayview2], device=dev)
    assert np.array_equal(_to_dense(sequence_value.data), [seq1_data, [seq2_data, [[0, 0, 0], [0, 0, 0]]]])
示例#6
0
def test_to_sequence_backprop(device_id):
    dev = cntk_device(device_id)
    input_vocab_size=3
    emb_dim = 2
    hidden_dim = 2
    num_labels = 2
    x_seq_input = C.sequence.input_variable(input_vocab_size, is_sparse=True, name='features')
    with C.default_options(initial_state=0.1):
        model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.layers.Dense(num_labels, name='classify')(model)

    z = model
    label_seq_input = C.sequence.input_variable(num_labels, is_sparse=True, name='labels')
    ce = C.cross_entropy_with_softmax(z, label_seq_input)

    seq1_data = [[0, 1, 1], [0, 1, 0], [1, 0, 0]]
    seq2_data = [[0, 0, 1], [0, 1, 1]]
    seq1_label_data = [[0, 1], [0, 1], [1, 0]]
    seq2_label_data = [[1, 0], [0, 1]]
    label_seq_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data)]
    param_grads_1, loss_result_1 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_seq_input : label_seq_data},
                                           wrt=ce.parameters, outputs=[ce], as_numpy=False)

    # Create a clone of the model that uses a non-sequence input
    # and converts it to a sequence using to_sequence
    x_non_seq_input = C.input_variable((C.FreeDimension, input_vocab_size), is_sparse=True, name='non_seq_features')
    x_seq_lens = C.input_variable((), name='sequence_lengths')
    x_seq = C.to_sequence(x_non_seq_input, x_seq_lens)
    x_seq = C.reconcile_dynamic_axes(C.times(x_seq, np.eye(input_vocab_size, dtype=np.float32)), label_seq_input)
    ce_clone = ce.clone('share', {x_seq_input : x_seq})

    x_non_seq_data = C.NDArrayView.from_csr(_to_csr([seq1_data, seq2_data + [[0, 0, 0]]]), shape=(2, 3, 3))
    x_seq_lens_data = np.asarray([3, 2], dtype=np.float32)

    x_non_seq_input = next(argument for argument in ce_clone.arguments if argument.name == 'non_seq_features')
    label_seq_input = next(argument for argument in ce_clone.arguments if argument.name == 'labels')
    x_seq_lens = next(argument for argument in ce_clone.arguments if argument.name == 'sequence_lengths')
    param_grads_2, loss_result_2 = ce_clone.grad({x_non_seq_input : x_non_seq_data, x_seq_lens : x_seq_lens_data, label_seq_input : label_seq_data},
                                                 wrt=ce_clone.parameters, outputs=[ce_clone], as_numpy=False)


    assert np.array_equal(loss_result_1.as_sequences()[0], loss_result_2.as_sequences()[0])
    assert np.array_equal(loss_result_1.as_sequences()[1], loss_result_2.as_sequences()[1])

    for param in param_grads_1:
        if not param_grads_1[param].is_sparse:
            reference_grad_value = param_grads_1[param].asarray()
            grad_value = param_grads_2[param].asarray()
            assert np.array_equal(reference_grad_value, grad_value)
示例#7
0
def test_to_sequence_backprop(device_id):
    dev = cntk_device(device_id)
    input_vocab_size=3
    emb_dim = 2
    hidden_dim = 2
    num_labels = 2
    x_seq_input = C.sequence.input_variable(input_vocab_size, is_sparse=True, name='features')
    with C.default_options(initial_state=0.1):
        model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.layers.Dense(num_labels, name='classify')(model)

    z = model
    label_seq_input = C.sequence.input_variable(num_labels, is_sparse=True, name='labels')
    ce = C.cross_entropy_with_softmax(z, label_seq_input)

    seq1_data = [[0, 1, 1], [0, 1, 0], [1, 0, 0]]
    seq2_data = [[0, 0, 1], [0, 1, 1]]
    seq1_label_data = [[0, 1], [0, 1], [1, 0]]
    seq2_label_data = [[1, 0], [0, 1]]
    label_seq_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data)]
    param_grads_1, loss_result_1 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_seq_input : label_seq_data},
                                           wrt=ce.parameters, outputs=[ce], as_numpy=False)

    # Create a clone of the model that uses a non-sequence input
    # and converts it to a sequence using to_sequence
    x_non_seq_input = C.input_variable((C.FreeDimension, input_vocab_size), is_sparse=True, name='non_seq_features')
    x_seq_lens = C.input_variable((), name='sequence_lengths')
    x_seq = C.to_sequence(x_non_seq_input, x_seq_lens)
    x_seq = C.reconcile_dynamic_axes(C.times(x_seq, np.eye(input_vocab_size, dtype=np.float32)), label_seq_input)
    ce_clone = ce.clone('share', {x_seq_input : x_seq})

    x_non_seq_data = C.NDArrayView.from_csr(_to_csr([seq1_data, seq2_data + [[0, 0, 0]]]), shape=(2, 3, 3))
    x_seq_lens_data = np.asarray([3, 2], dtype=np.float32)

    x_non_seq_input = next(argument for argument in ce_clone.arguments if argument.name == 'non_seq_features')
    label_seq_input = next(argument for argument in ce_clone.arguments if argument.name == 'labels')
    x_seq_lens = next(argument for argument in ce_clone.arguments if argument.name == 'sequence_lengths')
    param_grads_2, loss_result_2 = ce_clone.grad({x_non_seq_input : x_non_seq_data, x_seq_lens : x_seq_lens_data, label_seq_input : label_seq_data},
                                                 wrt=ce_clone.parameters, outputs=[ce_clone], as_numpy=False)


    assert np.array_equal(loss_result_1.as_sequences()[0], loss_result_2.as_sequences()[0])
    assert np.array_equal(loss_result_1.as_sequences()[1], loss_result_2.as_sequences()[1])

    for param in param_grads_1:
        if not param_grads_1[param].is_sparse:
            reference_grad_value = param_grads_1[param].asarray()
            grad_value = param_grads_2[param].asarray()
            assert np.array_equal(reference_grad_value, grad_value)
示例#8
0
def test_lstm_over_lstm_thought_vectors_2(device_id):
    dev = cntk_device(device_id)
    input_vocab_size=3
    emb_dim = 2
    hidden_dim = 2
    num_labels = 2
    utterances_input = C.sequence.input_variable((input_vocab_size), is_sparse=True, name='utterances')
    conversation_lengths_input = C.input_variable((), name='conversation_sequence_lengths')
    label_input = C.sequence.input_variable(num_labels, is_sparse=True, sequence_axis=C.Axis('label_sequence'), name='labels')
    with C.default_options(initial_state=0.1):
        model = C.layers.Embedding(emb_dim, name='embed')(utterances_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.sequence.last(model)
        model = C.user_function(UtteranceBatchReshape(model, conversation_lengths_input))
        model = C.to_sequence_like(model, label_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.layers.Dense(num_labels, name='classify')(model)

    z = model
    ce = C.cross_entropy_with_softmax(z, label_input)

    sentinel_utt_data = C.NDArrayView.from_csr(_to_csr([[0, 0, 1]]), device=C.cpu())
    c1_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1], [0, 1, 0], [1, 0, 0]]), device=C.cpu())
    c1_utt2_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0], [0, 1, 1]]), device=C.cpu())
    c1_utt3_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1], [0, 1, 0]]), device=C.cpu())
    c2_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1]]), device=C.cpu())
    c3_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0], [0, 1, 1], [1, 0, 0]]), device=C.cpu())
    c3_utt2_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0]]), device=C.cpu())

    all_utt_data = C.Value.create(C.sequence.input_variable((input_vocab_size), is_sparse=True), [c1_utt1_data, c1_utt2_data, c1_utt3_data, c2_utt1_data, sentinel_utt_data, sentinel_utt_data, c3_utt1_data, c3_utt2_data, sentinel_utt_data], device=C.cpu()).data
    conversation_lengths_data = np.asarray([3, 1, 2], dtype=np.float32)
    seq1_label_data = [[0, 1], [0, 1], [1, 0]]
    seq2_label_data = [[1, 0]]
    seq3_label_data = [[1, 0], [0, 1]]
    label_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data), _to_csr(seq3_label_data)]
    param_grads, loss_result = ce.grad({utterances_input : all_utt_data, label_input : label_data, conversation_lengths_input : conversation_lengths_data},
                                       wrt=ce.parameters, outputs=[ce], as_numpy=False)

    loss_result = loss_result.as_sequences()

    absolute_tolerance = 0.01
    assert np.allclose(loss_result[0], [[0.678914], [0.668076], [0.728129]], atol=absolute_tolerance)
    assert np.allclose(loss_result[1], [[0.679029]], atol=absolute_tolerance)
    assert np.allclose(loss_result[2], [[0.705393], [0.674243]], atol=absolute_tolerance)
示例#9
0
def test_lstm_over_lstm_thought_vectors_2(device_id):
    dev = cntk_device(device_id)
    input_vocab_size=3
    emb_dim = 2
    hidden_dim = 2
    num_labels = 2
    utterances_input = C.sequence.input_variable((input_vocab_size), is_sparse=True, name='utterances')
    conversation_lengths_input = C.input_variable((), name='conversation_sequence_lengths')
    label_input = C.sequence.input_variable(num_labels, is_sparse=True, sequence_axis=C.Axis('label_sequence'), name='labels')
    with C.default_options(initial_state=0.1):
        model = C.layers.Embedding(emb_dim, name='embed')(utterances_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.sequence.last(model)
        model = C.user_function(UtteranceBatchReshape(model, conversation_lengths_input))
        model = C.to_sequence_like(model, label_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.layers.Dense(num_labels, name='classify')(model)

    z = model
    ce = C.cross_entropy_with_softmax(z, label_input)

    sentinel_utt_data = C.NDArrayView.from_csr(_to_csr([[0, 0, 1]]), device=C.cpu())
    c1_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1], [0, 1, 0], [1, 0, 0]]), device=C.cpu())
    c1_utt2_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0], [0, 1, 1]]), device=C.cpu())
    c1_utt3_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1], [0, 1, 0]]), device=C.cpu())
    c2_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1]]), device=C.cpu())
    c3_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0], [0, 1, 1], [1, 0, 0]]), device=C.cpu())
    c3_utt2_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0]]), device=C.cpu())

    all_utt_data = C.Value.create(C.sequence.input_variable((input_vocab_size), is_sparse=True), [c1_utt1_data, c1_utt2_data, c1_utt3_data, c2_utt1_data, sentinel_utt_data, sentinel_utt_data, c3_utt1_data, c3_utt2_data, sentinel_utt_data], device=C.cpu()).data
    conversation_lengths_data = np.asarray([3, 1, 2], dtype=np.float32)
    seq1_label_data = [[0, 1], [0, 1], [1, 0]]
    seq2_label_data = [[1, 0]]
    seq3_label_data = [[1, 0], [0, 1]]
    label_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data), _to_csr(seq3_label_data)]
    param_grads, loss_result = ce.grad({utterances_input : all_utt_data, label_input : label_data, conversation_lengths_input : conversation_lengths_data},
                                       wrt=ce.parameters, outputs=[ce], as_numpy=False)

    loss_result = loss_result.as_sequences()

    absolute_tolerance = 0.01
    assert np.allclose(loss_result[0], [[0.678914], [0.668076], [0.728129]], atol=absolute_tolerance)
    assert np.allclose(loss_result[1], [[0.679029]], atol=absolute_tolerance)
    assert np.allclose(loss_result[2], [[0.705393], [0.674243]], atol=absolute_tolerance)
def test_ndarrayview_from_csr(device_id):
    dev = cntk_device(device_id)
    data = [[[0, 1, 1], [0, 1, 0]], [[1, 0, 0], [1, 0, 1]]]
    csr_data = _to_csr(data)
    ndarrayview = C.NDArrayView.from_csr(csr_data, shape=(2, 2, 3))
    assert np.array_equal(_to_dense(ndarrayview), data)

    with pytest.raises(ValueError):
        ndarrayview = C.NDArrayView.from_csr(csr_data, shape=(3, 2, 3))

    with pytest.raises(ValueError):
        ndarrayview = C.NDArrayView.from_csr(csr_data, shape=(2, 2, 4))
示例#11
0
def test_lstm_over_lstm_thought_vectors(device_id):
    dev = cntk_device(device_id)
    input_vocab_size=3
    emb_dim = 2
    hidden_dim = 2
    num_labels = 2
    x_seq_input = C.sequence.input_variable((C.FreeDimension, input_vocab_size), is_sparse=True, name='features')
    label_seq_input = C.sequence.input_variable(num_labels, is_sparse=True, sequence_axis=C.Axis('label_sequence'), name='labels')
    with C.default_options(initial_state=0.1):
        model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.sequence.last(model)
        model = C.to_sequence_like(model, label_seq_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.layers.Dense(num_labels, name='classify')(model)

    z = model
    ce = C.cross_entropy_with_softmax(z, label_seq_input)

    seq1_data = [[[0, 1, 1], [0, 1, 0], [1, 0, 0]], [[1, 1, 0], [0, 0, 1], [1, 0, 1]], [[1, 0, 0], [0, 0, 1], [1, 1, 0]]]
    csr_seq1 = _to_csr(seq1_data)
    ndarrayview1 = C.NDArrayView.from_csr(csr_seq1, shape=(3, 3, 3), device=C.cpu())
    seq2_data = [[[0, 0, 1], [0, 1, 1], [1, 0, 1]], [[0, 1, 0], [1, 0, 1], [0, 0, 0]]]
    csr_seq2 = _to_csr(seq2_data)
    ndarrayview2 = C.NDArrayView.from_csr(csr_seq2, shape=(2, 3, 3), device=C.cpu())
    x_seq_data = C.Value.create(C.sequence.input_variable((3, 3), is_sparse=True), [ndarrayview1, ndarrayview2], device=C.cpu()).data

    seq1_label_data = [[0, 1], [0, 1], [1, 0]]
    seq2_label_data = [[1, 0], [0, 1]]
    label_seq_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data)]
    param_grads, loss_result = ce.grad({x_seq_input : x_seq_data, label_seq_input : label_seq_data},
                                       wrt=ce.parameters, outputs=[ce], as_numpy=False)

    loss_result = loss_result.as_sequences()

    absolute_tolerance = 0.02
    assert np.allclose(loss_result[0], [[0.67126], [0.676331], [0.765814]], atol=absolute_tolerance)
    assert np.allclose(loss_result[1], [[0.685199], [0.681736]], atol=absolute_tolerance)
示例#12
0
def test_lstm_over_lstm_thought_vectors(device_id):
    dev = cntk_device(device_id)
    input_vocab_size=3
    emb_dim = 2
    hidden_dim = 2
    num_labels = 2
    x_seq_input = C.sequence.input_variable((C.FreeDimension, input_vocab_size), is_sparse=True, name='features')
    label_seq_input = C.sequence.input_variable(num_labels, is_sparse=True, sequence_axis=C.Axis('label_sequence'), name='labels')
    with C.default_options(initial_state=0.1):
        model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.sequence.last(model)
        model = C.to_sequence_like(model, label_seq_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.layers.Dense(num_labels, name='classify')(model)

    z = model
    ce = C.cross_entropy_with_softmax(z, label_seq_input)

    seq1_data = [[[0, 1, 1], [0, 1, 0], [1, 0, 0]], [[1, 1, 0], [0, 0, 1], [1, 0, 1]], [[1, 0, 0], [0, 0, 1], [1, 1, 0]]]
    csr_seq1 = _to_csr(seq1_data)
    ndarrayview1 = C.NDArrayView.from_csr(csr_seq1, shape=(3, 3, 3), device=C.cpu())
    seq2_data = [[[0, 0, 1], [0, 1, 1], [1, 0, 1]], [[0, 1, 0], [1, 0, 1], [0, 0, 0]]]
    csr_seq2 = _to_csr(seq2_data)
    ndarrayview2 = C.NDArrayView.from_csr(csr_seq2, shape=(2, 3, 3), device=C.cpu())
    x_seq_data = C.Value.create(C.sequence.input_variable((3, 3), is_sparse=True), [ndarrayview1, ndarrayview2], device=C.cpu()).data

    seq1_label_data = [[0, 1], [0, 1], [1, 0]]
    seq2_label_data = [[1, 0], [0, 1]]
    label_seq_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data)]
    param_grads, loss_result = ce.grad({x_seq_input : x_seq_data, label_seq_input : label_seq_data},
                                       wrt=ce.parameters, outputs=[ce], as_numpy=False)

    loss_result = loss_result.as_sequences()

    absolute_tolerance = 0.02
    assert np.allclose(loss_result[0], [[0.67126], [0.676331], [0.765814]], atol=absolute_tolerance)
    assert np.allclose(loss_result[1], [[0.685199], [0.681736]], atol=absolute_tolerance)
示例#13
0
def test_lstm_over_lstm_thought_vectors(device_id):
    previous_random_seed = C.cntk_py.get_random_seed()
    C.cntk_py.reset_random_seed(0)
    dev = cntk_device(device_id)
    input_vocab_size = 3
    emb_dim = 2
    hidden_dim = 2
    num_labels = 2
    x_seq_input = C.sequence.input((C.FreeDimension, input_vocab_size),
                                   is_sparse=True,
                                   name='features')
    label_seq_input = C.sequence.input(num_labels,
                                       is_sparse=True,
                                       sequence_axis=Axis('label_sequence'),
                                       name='labels')
    with C.default_options(initial_state=0.1):
        model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim),
                                    go_backwards=False)(model)
        model = C.sequence.last(model)
        model = C.to_sequence_like(model, label_seq_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim),
                                    go_backwards=False)(model)
        model = C.layers.Dense(num_labels, name='classify')(model)

    z = model
    ce = C.cross_entropy_with_softmax(z, label_seq_input)

    seq1_data = [[[0, 1, 1], [0, 1, 0], [1, 0, 0]],
                 [[1, 1, 0], [0, 0, 1], [1, 0, 1]],
                 [[1, 0, 0], [0, 0, 1], [1, 1, 0]]]
    csr_seq1 = _to_csr(seq1_data)
    ndarrayview1 = C.NDArrayView.from_csr(csr_seq1,
                                          shape=(3, 3, 3),
                                          device=C.cpu())
    seq2_data = [[[0, 0, 1], [0, 1, 1], [1, 0, 1]],
                 [[0, 1, 0], [1, 0, 1], [0, 0, 0]]]
    csr_seq2 = _to_csr(seq2_data)
    ndarrayview2 = C.NDArrayView.from_csr(csr_seq2,
                                          shape=(2, 3, 3),
                                          device=C.cpu())
    x_seq_data = C.Value.create(C.sequence.input((3, 3), is_sparse=True),
                                [ndarrayview1, ndarrayview2],
                                device=C.cpu()).data

    seq1_label_data = [[0, 1], [0, 1], [1, 0]]
    seq2_label_data = [[1, 0], [0, 1]]
    label_seq_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data)]
    param_grads, loss_result = ce.grad(
        {
            x_seq_input: x_seq_data,
            label_seq_input: label_seq_data
        },
        wrt=ce.parameters,
        outputs=[ce],
        as_numpy=False)

    loss_result = loss_result.as_sequences()

    # TODO: The tolerance here is inordinately high due to the non-determinism in initialization
    # of parameters as the individual tests are not run in separate processes resulting in the
    # addition or removal of tests to affect the random initialization of parameters in all other
    # tests that do not explicitly specify the random seed. The tolerance should be lowered to
    # 0.01 after this issue in the test infrastructure has been fixed.
    absolute_tolerance = 0.02
    assert np.allclose(loss_result[0], [[0.63504], [0.673343], [0.698446]],
                       atol=absolute_tolerance)
    assert np.allclose(loss_result[1], [[0.772344], [0.64295]],
                       atol=absolute_tolerance)

    C.cntk_py.reset_random_seed(previous_random_seed)