Exemplo n.º 1
0
def test_ffnet_error(device_id):
    from cntk.utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    avg_error = ffnet(debug_output=False)
    expected_avg_error = 0.12
    assert np.allclose(avg_error, expected_avg_error, atol=TOLERANCE_ABSOLUTE)
Exemplo n.º 2
0
def test_seq_classification_error(device_id):
    from cntk.utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed
    set_computation_network_trace_level(1)
    set_fixed_random_seed(
        1
    )  # to become invariant to initialization order, which is a valid change

    # test of the example itself
    # this emulates the main code in the PY file
    reader = create_reader(data_dir + "/atis.train.ctf")
    model = create_model()
    loss_avg, evaluation_avg = train(reader, model, max_epochs=1)
    expected_avg = [0.15570838301766451, 0.7846451368305728]
    assert np.allclose([evaluation_avg, loss_avg],
                       expected_avg,
                       atol=TOLERANCE_ABSOLUTE)

    # test of a config like in the example but with additions to test many code paths
    if device_id >= 0:  # BatchNormalization currently does not run on CPU
        reader = create_reader(data_dir + "/atis.train.ctf")
        model = create_test_model()
        loss_avg, evaluation_avg = train(reader, model, max_epochs=1)
        log_number_of_parameters(model, trace_level=1)
        print()
        expected_avg = [0.084, 0.407364]
        assert np.allclose([evaluation_avg, loss_avg],
                           expected_avg,
                           atol=TOLERANCE_ABSOLUTE)
Exemplo n.º 3
0
def test_seq_classification_error(device_id):
    from cntk.utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    evaluation_avg, loss_avg = slu_hands_on()

    expected_avg = [0.15570838301766451, 0.7846451368305728]
    assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
def test_sequence_to_sequence(device_id):
    from cntk.utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    error = sequence_to_sequence_translator()

    expected_error = 0.8596881547969316
    assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
Exemplo n.º 5
0
def test_simple_mnist_error(device_id):
    from cntk.utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    test_error = simple_mnist()
    expected_test_error = 0.09

    assert np.allclose(test_error, expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
Exemplo n.º 6
0
def test_seq_classification_error(device_id):
    from cntk.utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    evaluation_avg, loss_avg = train_sequence_classifier()

    expected_avg = [0.55, 1.53099]
    assert np.allclose([evaluation_avg, loss_avg],
                       expected_avg,
                       atol=TOLERANCE_ABSOLUTE)
Exemplo n.º 7
0
def test_initializer_init(device_id):
    from cntk.utils import cntk_device
    from cntk import DeviceDescriptor, cntk_py
    cntk_py.always_allow_setting_default_device()
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    _check(uniform(scale=10), 'uniform')
    _check(gaussian(output_rank=1, filter_rank=2, scale=10), 'gaussian')
    _check(xavier(output_rank=1, filter_rank=2, scale=10), 'xavier')
    _check(glorot_uniform(output_rank=1, filter_rank=2, scale=10),
           'glorot_uniform')
    _check(glorot_normal(output_rank=1, filter_rank=2, scale=10),
           'glorot_normal')
    _check(he_uniform(output_rank=1, filter_rank=2, scale=10), 'he_uniform')
    _check(he_normal(output_rank=1, filter_rank=2, scale=10), 'he_normal')
Exemplo n.º 8
0
def test_cifar_resnet_error(device_id):
    target_device = DeviceDescriptor.gpu_device(0)
    DeviceDescriptor.set_default_device(target_device)

    try:
        base_path = os.path.join(
            os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
            *"Image/CIFAR/v0/cifar-10-batches-py".split("/"))
    except KeyError:
        base_path = os.path.join(
            *
            "../../../../Examples/Image/Miscellaneous/CIFAR-10/cifar-10-batches-py"
            .split("/"))

    base_path = os.path.normpath(base_path)
    os.chdir(os.path.join(base_path, '..'))

    test_error = cifar_resnet(base_path)
    expected_test_error = 0.7

    assert np.allclose(test_error,
                       expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
Exemplo n.º 9
0
def test_cifar_resnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    try:
        base_path = os.path.join(
            os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
            *"Image/CIFAR/v0/cifar-10-batches-py".split("/"))
    except KeyError:
        base_path = os.path.join(
            *"../../../../Examples/Image/Datasets/CIFAR-10/cifar-10-batches-py"
            .split("/"))

    base_path = os.path.normpath(base_path)
    os.chdir(os.path.join(base_path, '..'))

    test_error = cifar_resnet(base_path)
    expected_test_error = 0.7

    assert np.allclose(test_error,
                       expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
Exemplo n.º 10
0
def test_seq_classification_error(device_id):
    from cntk.utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed
    set_computation_network_trace_level(1)
    set_fixed_random_seed(1) # to become invariant to initialization order, which is a valid change

    # test of the example itself
    # this emulates the main code in the PY file
    reader = create_reader(data_dir + "/atis.train.ctf")
    model = create_model()
    loss_avg, evaluation_avg = train(reader, model, max_epochs=1)
    expected_avg = [0.15570838301766451, 0.7846451368305728]
    assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)

    # test of a config like in the example but with additions to test many code paths
    if device_id >= 0: # BatchNormalization currently does not run on CPU
        reader = create_reader(data_dir + "/atis.train.ctf")
        model = create_test_model()
        loss_avg, evaluation_avg = train(reader, model, max_epochs=1)
        log_number_of_parameters(model, trace_level=1) ; print()
        expected_avg = [0.084, 0.407364]
        assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
Exemplo n.º 11
0
    trainer = Trainer(classifier_output, ce, pe,
                      [sgd_learner(classifier_output.owner.parameters(), lr)])

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200
    training_progress_output_freq = 10
    i = 0
    while True:
        mb = mb_source.get_next_minibatch(minibatch_size)
        if len(mb) == 0:
            break

        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        arguments = {
            features: mb[features_si].m_data,
            label: mb[labels_si].m_data
        }
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)

        i += 1


if __name__ == '__main__':
    # Specify the target device to be used for computing
    target_device = DeviceDescriptor.cpu_device()
    DeviceDescriptor.set_default_device(target_device)

    train_sequence_classifier()
def test_language_understanding(device_id):
    from cntk.ops.tests.ops_test_utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed
    #set_computation_network_trace_level(1)
    set_fixed_random_seed(1) # to become invariant to initialization order, which is a valid change
    # BUGBUG: This ^^ currently seems to have no impact; the two BN models below should be identical in training

    if device_id >= 0: # BatchNormalization currently does not run on CPU
        # change to intent classifier   --moved up here since this fails, as repro
        # BUGBUG: Broken, need to pass new criterion to train().
        #with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
        #    select_last = slice(Placeholder(), Axis.default_dynamic_axis(), -1, 0)
        #    # BUGBUG: Fails with "RuntimeError: The specified dynamic axis named defaultDynamicAxis does not match any of the dynamic axes of the operand"
        #    run_model_test('change to intent classifier', Sequential([
        #        Embedding(emb_dim),
        #        with_lookahead(),
        #        BatchNormalization(),
        #        BiRecurrence(LSTM(hidden_dim)),
        #        BatchNormalization(),
        #        select_last,  # fails here with an axis problem
        #        Dense(label_dim)
        #    ]), [0.084, 0.407364])


        # replace lookahead by bidirectional model
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('replace lookahead by bidirectional model', Sequential([
                Embedding(emb_dim),
                BatchNormalization(),
                BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)),
                BatchNormalization(),
                Dense(label_dim)
            ]), [0.0579573500457558, 0.3214986774820327])

        # replace lookahead by bidirectional model
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
          #with default_options(dtype=np.float64):  # test this with double precision since single precision is too little for reproducable aggregation
          # ^^ This test requires to change the #if 1 in Functions.cpp PopulateNetworkInputs() to be changed to #if 0.
            run_model_test('replace lookahead by bidirectional model, with shared BN', Sequential([
                Embedding(emb_dim),
                BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=True),
                #BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=False),
                BatchNormalization(normalization_time_constant=-1),
                Dense(label_dim)
            ]), [0.0579573500457558, 0.3214986774820327])
            # values with normalization_time_constant=-1 and double precision:
            # [0.0583178503091983, 0.3199431143304898]
            """ with normalization_time_constant=-1:
             Minibatch[   1-   1]: loss = 5.945220 * 67, metric = 100.0% * 67
             Minibatch[   2-   2]: loss = 4.850601 * 63, metric = 79.4% * 63
             Minibatch[   3-   3]: loss = 3.816031 * 68, metric = 57.4% * 68
             Minibatch[   4-   4]: loss = 2.213172 * 70, metric = 41.4% * 70
             Minibatch[   5-   5]: loss = 2.615342 * 65, metric = 40.0% * 65
             Minibatch[   6-   6]: loss = 2.360896 * 62, metric = 25.8% * 62
             Minibatch[   7-   7]: loss = 1.452822 * 58, metric = 27.6% * 58
             Minibatch[   8-   8]: loss = 0.947210 * 70, metric = 10.0% * 70
             Minibatch[   9-   9]: loss = 0.595654 * 59, metric = 10.2% * 59
             Minibatch[  10-  10]: loss = 1.515479 * 64, metric = 23.4% * 64
             Minibatch[  11- 100]: loss = 0.686744 * 5654, metric = 10.4% * 5654
             Minibatch[ 101- 200]: loss = 0.289059 * 6329, metric = 5.8% * 6329
             Minibatch[ 201- 300]: loss = 0.218765 * 6259, metric = 4.7% * 6259
             Minibatch[ 301- 400]: loss = 0.182855 * 6229, metric = 3.5% * 6229
             Minibatch[ 401- 500]: loss = 0.156745 * 6289, metric = 3.4% * 6289
            Finished Epoch [1]: [Training] loss = 0.321413 * 36061, metric = 5.8% * 36061
            --> 0.057818696098277916 0.3214128415043278
             Minibatch[   1-   1]: loss = 0.000000 * 991, metric = 2.5% * 991
             Minibatch[   2-   2]: loss = 0.000000 * 1000, metric = 2.8% * 1000
             Minibatch[   3-   3]: loss = 0.000000 * 992, metric = 4.0% * 992
             Minibatch[   4-   4]: loss = 0.000000 * 989, metric = 3.0% * 989
             Minibatch[   5-   5]: loss = 0.000000 * 998, metric = 3.8% * 998
             Minibatch[   6-   6]: loss = 0.000000 * 995, metric = 1.5% * 995
             Minibatch[   7-   7]: loss = 0.000000 * 998, metric = 2.5% * 998
             Minibatch[   8-   8]: loss = 0.000000 * 992, metric = 1.6% * 992
             Minibatch[   9-   9]: loss = 0.000000 * 1000, metric = 1.6% * 1000
             Minibatch[  10-  10]: loss = 0.000000 * 996, metric = 7.9% * 996
            Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.2% * 10984
            --> 0.03159140568099053 0.0
            """

        # BatchNorm test case for global-corpus aggregation
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('BatchNorm global-corpus aggregation', Sequential([
                Embedding(emb_dim),
                BatchNormalization(normalization_time_constant=-1),
                Recurrence(LSTM(hidden_dim), go_backwards=False),
                BatchNormalization(normalization_time_constant=-1),
                Dense(label_dim)
            ]), [0.05662627214996811, 0.2968516879905391])
            """
             Minibatch[   1-   1]: loss = 5.745576 * 67, metric = 100.0% * 67
             Minibatch[   2-   2]: loss = 4.684151 * 63, metric = 90.5% * 63
             Minibatch[   3-   3]: loss = 3.957423 * 68, metric = 63.2% * 68
             Minibatch[   4-   4]: loss = 2.286908 * 70, metric = 41.4% * 70
             Minibatch[   5-   5]: loss = 2.733978 * 65, metric = 38.5% * 65
             Minibatch[   6-   6]: loss = 2.189765 * 62, metric = 30.6% * 62
             Minibatch[   7-   7]: loss = 1.427890 * 58, metric = 25.9% * 58
             Minibatch[   8-   8]: loss = 1.501557 * 70, metric = 18.6% * 70
             Minibatch[   9-   9]: loss = 0.632599 * 59, metric = 13.6% * 59
             Minibatch[  10-  10]: loss = 1.516047 * 64, metric = 23.4% * 64
             Minibatch[  11- 100]: loss = 0.580329 * 5654, metric = 9.8% * 5654
             Minibatch[ 101- 200]: loss = 0.280317 * 6329, metric = 5.6% * 6329
             Minibatch[ 201- 300]: loss = 0.188372 * 6259, metric = 4.1% * 6259
             Minibatch[ 301- 400]: loss = 0.170403 * 6229, metric = 3.9% * 6229
             Minibatch[ 401- 500]: loss = 0.159605 * 6289, metric = 3.4% * 6289
            Finished Epoch [1]: [Training] loss = 0.296852 * 36061, metric = 5.7% * 36061
            --> 0.05662627214996811 0.2968516879905391
             Minibatch[   1-   1]: loss = 0.000000 * 991, metric = 1.8% * 991
             Minibatch[   2-   2]: loss = 0.000000 * 1000, metric = 3.4% * 1000
             Minibatch[   3-   3]: loss = 0.000000 * 992, metric = 3.9% * 992
             Minibatch[   4-   4]: loss = 0.000000 * 989, metric = 4.1% * 989
             Minibatch[   5-   5]: loss = 0.000000 * 998, metric = 4.0% * 998
             Minibatch[   6-   6]: loss = 0.000000 * 995, metric = 1.2% * 995
             Minibatch[   7-   7]: loss = 0.000000 * 998, metric = 2.8% * 998
             Minibatch[   8-   8]: loss = 0.000000 * 992, metric = 2.9% * 992
             Minibatch[   9-   9]: loss = 0.000000 * 1000, metric = 2.0% * 1000
             Minibatch[  10-  10]: loss = 0.000000 * 996, metric = 8.2% * 996
            Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.5% * 10984
            --> 0.035050983248361256 0.0
            """


        # plus BatchNorm
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('plus BatchNorm', Sequential([
                Embedding(emb_dim),
                BatchNormalization(),
                Recurrence(LSTM(hidden_dim), go_backwards=False),
                BatchNormalization(),
                Dense(label_dim)
            ]), [0.05662627214996811, 0.2968516879905391])

        # plus lookahead
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('plus lookahead', Sequential([
                Embedding(emb_dim),
                with_lookahead(),
                BatchNormalization(),
                Recurrence(LSTM(hidden_dim), go_backwards=False),
                BatchNormalization(),
                Dense(label_dim)
            ]), [0.057901888466764646, 0.3044637752807047])

        # replace lookahead by bidirectional model
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('replace lookahead by bidirectional model', Sequential([
                Embedding(emb_dim),
                BatchNormalization(),
                BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)),
                BatchNormalization(),
                Dense(label_dim)
            ]), [0.0579573500457558, 0.3214986774820327])

        # test of a config like in the example but with additions to test many code paths
        with default_options(enable_self_stabilization=True, use_peepholes=True):
                run_model_test('alternate paths', Sequential([
                Stabilizer(),
                Embedding(emb_dim),
                BatchNormalization(),
                Recurrence(LSTM(hidden_dim, cell_shape=hidden_dim+50), go_backwards=True),
                BatchNormalization(map_rank=1),
                    Dense(label_dim)
                ]), [0.08574360112032389, 0.41847621578367716])

    # test of the example itself
    # this emulates the main code in the PY file
    reader = create_reader(data_dir + "/atis.train.ctf", is_training=True)
    model = create_model()
    loss_avg, evaluation_avg = train(reader, model, max_epochs=1)
    expected_avg = [0.15570838301766451, 0.7846451368305728]
    assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)

    # test of a config like in the example but with additions to test many code paths
    if device_id >= 0: # BatchNormalization currently does not run on CPU
        # Create a path to TensorBoard log directory and make sure it does not exist.
        abs_path = os.path.dirname(os.path.abspath(__file__))
        tb_logdir = os.path.join(abs_path, 'language_understanding_test_log')
        if os.path.exists(tb_logdir):
            shutil.rmtree(tb_logdir)

        reader = create_reader(data_dir + "/atis.train.ctf", is_training=True)
        model = create_test_model()
        loss_avg, evaluation_avg = train(reader, model, max_epochs=1, tensorboard_logdir=tb_logdir)
        log_number_of_parameters(model, trace_level=1) ; print()
        expected_avg = [0.084, 0.407364]
        assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)

        # Ensure that the TensorBoard log directory was created and contains exactly one file with the expected name.
        tb_files = 0
        for tb_file in os.listdir(tb_logdir):
            assert tb_file.startswith("events.out.tfevents")
            tb_files += 1
        assert tb_files == 1
Exemplo n.º 13
0
    lr = learning_rates_per_sample(0.007)
    momentum_time_constant = 1100
    momentum_per_sample = momentums_per_sample(math.exp(-1.0 / momentum_time_constant))
    clipping_threshold_per_sample = 2.3
    gradient_clipping_with_truncation = True

    trainer = Trainer(z, ce, errs, [momentum_sgd_learner(z.owner.parameters(), lr, momentum_per_sample, clipping_threshold_per_sample, gradient_clipping_with_truncation)])                   

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 72
    training_progress_output_freq = 10
    while True:
        mb = mb_source.get_next_minibatch(minibatch_size)
        if  len(mb) == 0:
            break

        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        arguments = {raw_input : mb[features_si].m_data, raw_labels : mb[labels_si].m_data}
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)

        i += 1

if __name__=='__main__':    
    # Specify the target device to be used for computing
    target_device = DeviceDescriptor.cpu_device()
    DeviceDescriptor.set_default_device(target_device)

    train_sequence_to_sequence_translator()
Exemplo n.º 14
0
def test_seq_classification_error(device_id):
    from cntk.utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    evaluation_avg, loss_avg = train_sequence_classifier()
def test_language_understanding(device_id):
    from cntk.ops.tests.ops_test_utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed
    #set_computation_network_trace_level(1)
    set_fixed_random_seed(1) # to become invariant to initialization order, which is a valid change
    # BUGBUG: This ^^ currently seems to have no impact; the two BN models below should be identical in training

    if device_id >= 0: # BatchNormalization currently does not run on CPU
        # change to intent classifier   --moved up here since this fails, as repro
        # BUGBUG: Broken, need to pass new criterion to train().
        #with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
        #    select_last = slice(Placeholder(), Axis.default_dynamic_axis(), -1, 0)
        #    # BUGBUG: Fails with "RuntimeError: The specified dynamic axis named defaultDynamicAxis does not match any of the dynamic axes of the operand"
        #    run_model_test('change to intent classifier', Sequential([
        #        Embedding(emb_dim),
        #        with_lookahead(),
        #        BatchNormalization(),
        #        BiRecurrence(LSTM(hidden_dim)),
        #        BatchNormalization(),
        #        select_last,  # fails here with an axis problem
        #        Dense(num_labels)
        #    ]), [0.084, 0.407364])


        # replace lookahead by bidirectional model
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('replace lookahead by bidirectional model', Sequential([
                Embedding(emb_dim),
                BatchNormalization(),
                BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)),
                BatchNormalization(),
                Dense(num_labels)
            ]), [0.0579573500457558, 0.3214986774820327])

        # replace lookahead by bidirectional model
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
          #with default_options(dtype=np.float64):  # test this with double precision since single precision is too little for reproducable aggregation
          # ^^ This test requires to change the #if 1 in Functions.cpp PopulateNetworkInputs() to be changed to #if 0.
            run_model_test('replace lookahead by bidirectional model, with shared BN', Sequential([
                Embedding(emb_dim),
                BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=True),
                #BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=False),
                BatchNormalization(normalization_time_constant=-1),
                Dense(num_labels)
            ]), [0.0579573500457558, 0.3214986774820327])
            # values with normalization_time_constant=-1 and double precision:
            # [0.0583178503091983, 0.3199431143304898]
            """ with normalization_time_constant=-1:
             Minibatch[   1-   1]: loss = 5.945220 * 67, metric = 100.0% * 67
             Minibatch[   2-   2]: loss = 4.850601 * 63, metric = 79.4% * 63
             Minibatch[   3-   3]: loss = 3.816031 * 68, metric = 57.4% * 68
             Minibatch[   4-   4]: loss = 2.213172 * 70, metric = 41.4% * 70
             Minibatch[   5-   5]: loss = 2.615342 * 65, metric = 40.0% * 65
             Minibatch[   6-   6]: loss = 2.360896 * 62, metric = 25.8% * 62
             Minibatch[   7-   7]: loss = 1.452822 * 58, metric = 27.6% * 58
             Minibatch[   8-   8]: loss = 0.947210 * 70, metric = 10.0% * 70
             Minibatch[   9-   9]: loss = 0.595654 * 59, metric = 10.2% * 59
             Minibatch[  10-  10]: loss = 1.515479 * 64, metric = 23.4% * 64
             Minibatch[  11- 100]: loss = 0.686744 * 5654, metric = 10.4% * 5654
             Minibatch[ 101- 200]: loss = 0.289059 * 6329, metric = 5.8% * 6329
             Minibatch[ 201- 300]: loss = 0.218765 * 6259, metric = 4.7% * 6259
             Minibatch[ 301- 400]: loss = 0.182855 * 6229, metric = 3.5% * 6229
             Minibatch[ 401- 500]: loss = 0.156745 * 6289, metric = 3.4% * 6289
            Finished Epoch [1]: [Training] loss = 0.321413 * 36061, metric = 5.8% * 36061
            --> 0.057818696098277916 0.3214128415043278
             Minibatch[   1-   1]: loss = 0.000000 * 991, metric = 2.5% * 991
             Minibatch[   2-   2]: loss = 0.000000 * 1000, metric = 2.8% * 1000
             Minibatch[   3-   3]: loss = 0.000000 * 992, metric = 4.0% * 992
             Minibatch[   4-   4]: loss = 0.000000 * 989, metric = 3.0% * 989
             Minibatch[   5-   5]: loss = 0.000000 * 998, metric = 3.8% * 998
             Minibatch[   6-   6]: loss = 0.000000 * 995, metric = 1.5% * 995
             Minibatch[   7-   7]: loss = 0.000000 * 998, metric = 2.5% * 998
             Minibatch[   8-   8]: loss = 0.000000 * 992, metric = 1.6% * 992
             Minibatch[   9-   9]: loss = 0.000000 * 1000, metric = 1.6% * 1000
             Minibatch[  10-  10]: loss = 0.000000 * 996, metric = 7.9% * 996
            Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.2% * 10984
            --> 0.03159140568099053 0.0
            """

        # BatchNorm test case for global-corpus aggregation
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('BatchNorm global-corpus aggregation', Sequential([
                Embedding(emb_dim),
                BatchNormalization(normalization_time_constant=-1),
                Recurrence(LSTM(hidden_dim), go_backwards=False),
                BatchNormalization(normalization_time_constant=-1),
                Dense(num_labels)
            ]), [0.05662627214996811, 0.2968516879905391])
            """
             Minibatch[   1-   1]: loss = 5.745576 * 67, metric = 100.0% * 67
             Minibatch[   2-   2]: loss = 4.684151 * 63, metric = 90.5% * 63
             Minibatch[   3-   3]: loss = 3.957423 * 68, metric = 63.2% * 68
             Minibatch[   4-   4]: loss = 2.286908 * 70, metric = 41.4% * 70
             Minibatch[   5-   5]: loss = 2.733978 * 65, metric = 38.5% * 65
             Minibatch[   6-   6]: loss = 2.189765 * 62, metric = 30.6% * 62
             Minibatch[   7-   7]: loss = 1.427890 * 58, metric = 25.9% * 58
             Minibatch[   8-   8]: loss = 1.501557 * 70, metric = 18.6% * 70
             Minibatch[   9-   9]: loss = 0.632599 * 59, metric = 13.6% * 59
             Minibatch[  10-  10]: loss = 1.516047 * 64, metric = 23.4% * 64
             Minibatch[  11- 100]: loss = 0.580329 * 5654, metric = 9.8% * 5654
             Minibatch[ 101- 200]: loss = 0.280317 * 6329, metric = 5.6% * 6329
             Minibatch[ 201- 300]: loss = 0.188372 * 6259, metric = 4.1% * 6259
             Minibatch[ 301- 400]: loss = 0.170403 * 6229, metric = 3.9% * 6229
             Minibatch[ 401- 500]: loss = 0.159605 * 6289, metric = 3.4% * 6289
            Finished Epoch [1]: [Training] loss = 0.296852 * 36061, metric = 5.7% * 36061
            --> 0.05662627214996811 0.2968516879905391
             Minibatch[   1-   1]: loss = 0.000000 * 991, metric = 1.8% * 991
             Minibatch[   2-   2]: loss = 0.000000 * 1000, metric = 3.4% * 1000
             Minibatch[   3-   3]: loss = 0.000000 * 992, metric = 3.9% * 992
             Minibatch[   4-   4]: loss = 0.000000 * 989, metric = 4.1% * 989
             Minibatch[   5-   5]: loss = 0.000000 * 998, metric = 4.0% * 998
             Minibatch[   6-   6]: loss = 0.000000 * 995, metric = 1.2% * 995
             Minibatch[   7-   7]: loss = 0.000000 * 998, metric = 2.8% * 998
             Minibatch[   8-   8]: loss = 0.000000 * 992, metric = 2.9% * 992
             Minibatch[   9-   9]: loss = 0.000000 * 1000, metric = 2.0% * 1000
             Minibatch[  10-  10]: loss = 0.000000 * 996, metric = 8.2% * 996
            Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.5% * 10984
            --> 0.035050983248361256 0.0
            """


        # plus BatchNorm
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('plus BatchNorm', Sequential([
                Embedding(emb_dim),
                BatchNormalization(),
                Recurrence(LSTM(hidden_dim), go_backwards=False),
                BatchNormalization(),
                Dense(num_labels)
            ]), [0.05662627214996811, 0.2968516879905391])

        # plus lookahead
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('plus lookahead', Sequential([
                Embedding(emb_dim),
                with_lookahead(),
                BatchNormalization(),
                Recurrence(LSTM(hidden_dim), go_backwards=False),
                BatchNormalization(),
                Dense(num_labels)
            ]), [0.057901888466764646, 0.3044637752807047])

        # replace lookahead by bidirectional model
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('replace lookahead by bidirectional model', Sequential([
                Embedding(emb_dim),
                BatchNormalization(),
                BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)),
                BatchNormalization(),
                Dense(num_labels)
            ]), [0.0579573500457558, 0.3214986774820327])

        # test of a config like in the example but with additions to test many code paths
        with default_options(enable_self_stabilization=True, use_peepholes=True):
                run_model_test('alternate paths', Sequential([
                Embedding(emb_dim),
                BatchNormalization(),
                Recurrence(LSTM(hidden_dim, cell_shape=hidden_dim+50), go_backwards=True),
                BatchNormalization(map_rank=1),
                    Dense(num_labels)
                ]), [0.08574360112032389, 0.41847621578367716])

    # test of the example itself
    # this emulates the main code in the PY file
    if device_id >= 0: # sparse Adam currently does not run on CPU
        reader = create_reader(data_dir + "/atis.train.ctf", is_training=True)
        model = create_model_function()
        #loss_avg, evaluation_avg = train(reader, model, max_epochs=1)
        #expected_avg = [0.15570838301766451, 0.7846451368305728]
        #assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)

        # test
        reader = create_reader(data_dir + "/atis.test.ctf", is_training=False)
        evaluate(reader, model)

    # test of a config like in the example but with additions to test many code paths
    if device_id >= 0: # BatchNormalization currently does not run on CPU
        # Create a path to TensorBoard log directory and make sure it does not exist.
        abs_path = os.path.dirname(os.path.abspath(__file__))
        tb_logdir = os.path.join(abs_path, 'language_understanding_test_log')
        if os.path.exists(tb_logdir):
            shutil.rmtree(tb_logdir)

        reader = create_reader(data_dir + "/atis.train.ctf", is_training=True)
        model = create_test_model()
        # TODO: update example to support tensorboard, or decide to not show it in all examples (in upcoming update of examples)
        loss_avg, evaluation_avg = train(reader, model, max_epochs=1) #, tensorboard_logdir=tb_logdir)
        log_number_of_parameters(model, trace_level=1) ; print()
        expected_avg = [0.084, 0.407364]
        assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
Exemplo n.º 16
0
def test_sequence_to_sequence(device_id):
    from cntk.utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    error = sequence_to_sequence_translator()