示例#1
0
def test_native_user_function(tmpdir):

    if not C.cntk_py.is_native_user_function_registered('NativeUserTimesOp'):
        C.ops.register_native_user_function(
            'NativeUserTimesOp',
            'Cntk.ExtensibilityExamples-' + C.__version__.rstrip('+'),
            'CreateUserTimesFunction')

    dev = C.cpu()
    x = C.input_variable((2))
    w = C.parameter((2, 2),
                    init=np.asarray([[0.5, 2], [-0.5, 1.5]], dtype=np.float32),
                    device=dev)
    attributes = {
        'param_rank': 2,
        'padding': True,
        'none': None,
        'nested lists': [[1, 2, 3], [4, 5, 6]],
        'string': 'string',
        'some data': np.arange(1, 10, dtype=np.float32).reshape((3, 3))
    }

    def verify_attributes(udf):
        for k, v in attributes.items():
            if not isinstance(v, np.ndarray):
                assert udf.attributes[k] == v
            else:
                assert (udf.attributes[k] == v).all()

    op = C.ops.native_user_function('NativeUserTimesOp', [w, x], attributes,
                                    'native_user_times_function')

    verify_attributes(op.owner)

    filepath = str(tmpdir / 'test_native_user_function.dat')
    op.save(filepath)

    op_reloaded = Function.load(filepath, device=dev)
    x_data = C.NDArrayView.from_dense(np.asarray([[0.1, 0.2], [-0.1, 0.3]],
                                                 dtype=np.float32),
                                      device=dev)
    result = op_reloaded.eval({op_reloaded.arguments[0]: x_data}, device=dev)

    assert np.allclose(result, [[-0.05, 0.5], [-0.2, 0.25]])

    native_times_primitive = op_reloaded.find_by_name(
        'native_user_times_function')

    verify_attributes(native_times_primitive)
示例#2
0
def test_override_serialize(tmpdir):
    dev = C.cpu()
    a, b = 1.2322341, -0.29084
    op = MyPlusPlus([C.constant(a), C.constant(b)], '++')
    op = MyPlusPlus([op, op], '+++')
    op = MyPlusPlus([op, op], '++++')
    op = C.user_function(op)
    result1 = op.eval({}, device=dev)

    filepath = str(tmpdir / 'test_udf_with_renamed_deserialize.dat')
    op.save(filepath)

    op_reloaded = Function.load(filepath, device=dev)

    assert result1 == op_reloaded.eval({}, device=dev)
示例#3
0
def test_override_serialize(tmpdir):
    dev = C.cpu()
    a, b = 1.2322341, -0.29084
    op = MyPlusPlus([C.constant(a), C.constant(b)], '++')
    op = MyPlusPlus([op, op], '+++')
    op = MyPlusPlus([op, op], '++++')
    op = C.user_function(op)
    result1 = op.eval({}, device=dev)

    filepath = str(tmpdir / 'test_udf_with_renamed_deserialize.dat')
    op.save(filepath)

    op_reloaded = Function.load(filepath, device=dev)

    assert result1 == op_reloaded.eval({}, device=dev)
示例#4
0
def test_both_flavors_of_user_functions(tmpdir):
    dev, w_value, c1_value, c2_value, op = build_test_function()

    filepath = str(tmpdir / 'test_native_user_function.dat')
    op.save(filepath)
    op_reloaded = Function.load(filepath, device=dev)

    np.random.seed(1)

    for i in range(5):
        x_value = np.random.random((2, 2)).astype(np.float32)
        x_data = C.NDArrayView.from_dense(x_value, device=dev)
        result = op_reloaded.eval({op_reloaded.arguments[0]: x_data}, device=dev)
        expected = np.matmul((x_value + c1_value), w_value) + c2_value
        assert np.allclose(result, expected)
def test_both_flavors_of_user_functions(tmpdir):
    dev, w_value, c1_value, c2_value, op = build_test_function()

    filepath = str(tmpdir / 'test_native_user_function.dat')
    op.save(filepath)
    op_reloaded = Function.load(filepath, device=dev)

    np.random.seed(1)

    for i in range(5):
        x_value = np.random.random((2, 2)).astype(np.float32)
        x_data = C.NDArrayView.from_dense(x_value, device=dev)
        result = op_reloaded.eval({op_reloaded.arguments[0]: x_data}, device=dev)
        expected = np.matmul((x_value + c1_value), w_value) + c2_value
        assert np.allclose(result, expected)
示例#6
0
def test_ext_train(tmpdir):
    dim = 4

    p = C.parameter(shape=(dim, ), init=10)
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m = MyPlus(i, C.constant(3), 'my_plus')
    # keeping m unwrapped since we need to access its member variables
    z = C.user_function(m) + p

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size=1)
    trainer = C.Trainer(z, (z + 0, z + 0), [
        C.momentum_sgd(z.parameters,
                       lr_per_sample,
                       momentum_time_constant,
                       True,
                       minibatch_size=0)
    ])

    i = 0
    while i < 100:
        i += 1
        input_data = np.random.rand(dim)
        trainer.train_minibatch([input_data])

    assert m.forward_calls == m.backward_calls == 100

    filepath = str(tmpdir / 'test_ext_train.dat')

    z.save(filepath)

    buf = open(filepath, 'rb').read()

    # this is only need for Python 2.7
    # (which does not distinguish between bytes and strings)
    if isinstance(buf, str):
        buf = bytearray(buf)

    z1 = Function.load(buf)

    m1 = z1.find_by_name('my_plus')
    # m1 is an instance of UserFunction, cannot directly downcast it to MyPlus,
    # using serialize as workaround:
    state = m1.serialize()['state']

    assert state['forward_calls'] == state['backward_calls'] == 100
示例#7
0
def test_ext_lambdafunc(tmpdir):
    dim = 4

    class CallbackCounter(object):
        def __init__(self):
            self.count = 0

        def inc(self, arg):
            self.count += 1

    cb = CallbackCounter()

    p = C.parameter(shape=(dim,), init=1)
    i = C.input_variable(dim, needs_gradient=True, name='i_var')
    k = i * p
    m = LambdaFunc(k,
                   when=lambda arg: np.sum(arg) > 1,
                   execute=cb.inc)
    m = C.user_function(m)
    z0 = m + 0

    filepath = str(tmpdir / 'test_ext_lambdafunc.dat')
    z0.save(filepath)

    Function.register_udf_deserialize_callback('conditional_exec_lambda',
                                               lambda x, *unused: LambdaFunc(x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc))

    z = Function.load(filepath)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters,
                                                           lr_per_sample,
                                                           momentum_time_constant,
                                                           True)])

    i = 0
    input_data = 0.1 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 0

    input_data = 0.3 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 1
def test_ext_lambdafunc(tmpdir):
    dim = 4

    class CallbackCounter(object):
        def __init__(self):
            self.count = 0

        def inc(self, arg):
            self.count += 1

    cb = CallbackCounter()

    p = C.parameter(shape=(dim,), init=1)
    i = C.input_variable(dim, needs_gradient=True, name='i_var')
    k = i * p
    m = LambdaFunc(k,
                   when=lambda arg: np.sum(arg) > 1,
                   execute=cb.inc)
    m = C.user_function(m)
    z0 = m + 0

    filepath = str(tmpdir / 'test_ext_lambdafunc.dat')
    z0.save(filepath)

    Function.register_udf_deserialize_callback('conditional_exec_lambda',
                                               lambda x, *unused: LambdaFunc(x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc))

    z = Function.load(filepath)

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size = 1)
    trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters,
                                                           lr_per_sample,
                                                           momentum_time_constant,
                                                           True)])

    i = 0
    input_data = 0.1 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 0

    input_data = 0.3 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 1
示例#9
0
def test_override_deserialize(tmpdir):
    dev, w_value, c1_value, c2_value, op = build_test_function()

    filepath = str(tmpdir / 'test_override_deserialize.dat')
    op.save(filepath)

    Function.register_udf_deserialize_callback(MyPlus._op_name(),
                                               lambda *x: MyPlusPlus(*x))

    op_reloaded = Function.load(filepath, device=dev)

    np.random.seed(1)

    for i in range(5):
        x_value = np.random.random((2, 2)).astype(np.float32)
        x_data = C.NDArrayView.from_dense(x_value, device=dev)
        result = op_reloaded.eval({op_reloaded.arguments[0]: x_data}, device=dev)
        expected = 2 * (np.matmul(2 * (x_value + c1_value), w_value) + c2_value)
        assert np.allclose(result, expected)
def test_override_deserialize(tmpdir):
    dev, w_value, c1_value, c2_value, op = build_test_function()

    filepath = str(tmpdir / 'test_override_deserialize.dat')
    op.save(filepath)

    Function.register_udf_deserialize_callback(MyPlus._op_name(),
                                               lambda *x: MyPlusPlus(*x))

    op_reloaded = Function.load(filepath, device=dev)

    np.random.seed(1)

    for i in range(5):
        x_value = np.random.random((2, 2)).astype(np.float32)
        x_data = C.NDArrayView.from_dense(x_value, device=dev)
        result = op_reloaded.eval({op_reloaded.arguments[0]: x_data}, device=dev)
        expected = 2 * (np.matmul(2 * (x_value + c1_value), w_value) + c2_value)
        assert np.allclose(result, expected)
示例#11
0
def test_ext_lambdafunc(tmpdir):
    dim = 4

    class CallbackCounter(object):
        def __init__(self):
            self.count = 0

        def inc(self, arg):
            self.count += 1

    cb = CallbackCounter()

    p = parameter(shape=(dim, ), init=1)
    i = input(dim, needs_gradient=True, name='i_var')
    k = i * p
    m = LambdaFunc(k, when=lambda arg: np.sum(arg) > 1, execute=cb.inc)
    m = user_function(m)
    z0 = m + 0

    filepath = str(tmpdir / 'test_ext_lambdafunc.dat')
    z0.save(filepath)
    z = Function.load(
        filepath,
        udf_factory_callback_map={
            'conditional_exec_lambda':
            lambda x, *unused: LambdaFunc(
                x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc)
        })
    momentum_time_constant = momentum_as_time_constant_schedule(1100)
    lr_per_sample = learning_rate_schedule(0.007, UnitType.sample)
    trainer = Trainer(z, (z+0, z+0), \
                      [momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                      True)])

    i = 0
    input_data = 0.1 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 0

    input_data = 0.3 * np.ones(dim)
    trainer.train_minibatch([input_data])
    assert cb.count == 1
示例#12
0
def test_ext_train(tmpdir):
    dim = 4

    p = C.parameter(shape=(dim,), init=10)
    i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var')
    m = MyPlus(i, C.constant(3), 'my_plus')
    # keeping m unwrapped since we need to access its member variables
    z = C.user_function(m) + p

    momentum_time_constant = C.momentum_as_time_constant_schedule(1100)
    lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample)
    trainer = C.Trainer(z, (z + 0, z + 0),
                        [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                                        True)])

    i = 0
    while i < 100:
        i += 1
        input_data = np.random.rand(dim)
        trainer.train_minibatch([input_data])

    assert m.forward_calls == m.backward_calls == 100

    filepath = str(tmpdir / 'test_ext_train.dat')

    z.save(filepath)

    buf = open(filepath, 'rb').read()

    # this is only need for Python 2.7
    # (which does not distinguish between bytes and strings)
    if isinstance(buf, str):
        buf = bytearray(buf)

    z1 = Function.load(buf)

    m1 = z1.find_by_name('my_plus')
    # m1 is an instance of UserFunction, cannot directly downcast it to MyPlus,
    # using serialize as workaround:
    state = m1.serialize()['state']

    assert state['forward_calls'] == state['backward_calls'] == 100
示例#13
0
def test_native_user_function(tmpdir):

    if not C.cntk_py.is_native_user_function_registered('NativeUserTimesOp'):
        C.ops.register_native_user_function('NativeUserTimesOp', 'Cntk.ExtensibilityExamples-' + C.__version__.rstrip('+'), 'CreateUserTimesFunction')

    dev = C.cpu()
    x = C.input_variable((2))
    w = C.parameter((2, 2), init=np.asarray([[0.5, 2], [-0.5, 1.5]], dtype=np.float32), device=dev)
    attributes = {'param_rank': 2,
                  'padding': True,
                  'none': None,
                  'nested lists': [[1, 2, 3], [4, 5, 6]],
                  'string': 'string',
                  'some data': np.arange(1, 10, dtype=np.float32).reshape((3, 3))
                  }

    def verify_attributes(udf):
        for k, v in attributes.items():
            if not isinstance(v, np.ndarray):
                assert udf.attributes[k] == v
            else:
                assert (udf.attributes[k] == v).all()

    op = C.ops.native_user_function('NativeUserTimesOp', [w, x], attributes, 'native_user_times_function')

    verify_attributes(op.owner)

    filepath = str(tmpdir / 'test_native_user_function.dat')
    op.save(filepath)

    op_reloaded = Function.load(filepath, device=dev)
    x_data = C.NDArrayView.from_dense(np.asarray([[0.1, 0.2], [-0.1, 0.3]], dtype=np.float32), device=dev)
    result = op_reloaded.eval({op_reloaded.arguments[0]: x_data}, device=dev)

    assert np.allclose(result, [[-0.05, 0.5], [-0.2, 0.25]])

    native_times_primitive = op_reloaded.find_by_name('native_user_times_function')

    verify_attributes(native_times_primitive)
示例#14
0
    # hook up data
    vocab, i2w, w2i = get_vocab(os.path.join(DATA_DIR, VOCAB_FILE))

    # create inputs and create model
    model = create_model()

    # train
    train_reader = create_reader(os.path.join(DATA_DIR, TRAINING_DATA), True)
    valid_reader = create_reader(os.path.join(DATA_DIR, VALIDATION_DATA), True)
    train(train_reader,
          valid_reader,
          vocab,
          i2w,
          model,
          max_epochs=30,
          epoch_size=908241)

    test_epoch = 10
    model = Function.load(model_path(test_epoch))

    # test string error rate on decoded output
    test_reader = create_reader(os.path.join(DATA_DIR, TESTING_DATA), False)
    evaluate_decoding(test_reader, model, i2w)

    # test same metric same as in training on test set
    test_reader = create_reader(os.path.join(DATA_DIR, TESTING_DATA), False)
    evaluate_metric(test_reader, model)

    # try the model out in an interactive session
    interactive_session(model, vocab, i2w, show_attention=True)
示例#15
0
if __name__ == '__main__':
    #try_set_default_device(cpu())

    from _cntk_py import set_fixed_random_seed
    set_fixed_random_seed(1)

    # hook up data
    vocab, i2w, w2i = get_vocab(os.path.join(DATA_DIR, VOCAB_FILE))

    # create inputs and create model
    model = create_model()

    # train
    train_reader = create_reader(os.path.join(DATA_DIR, TRAINING_DATA), True)
    valid_reader = create_reader(os.path.join(DATA_DIR, VALIDATION_DATA), True)
    train(train_reader, valid_reader, vocab, i2w, model, max_epochs=30, epoch_size=908241)

    test_epoch = 10
    model = Function.load(model_path(test_epoch))

    # test string error rate on decoded output
    test_reader = create_reader(os.path.join(DATA_DIR, TESTING_DATA), False)
    evaluate_decoding(test_reader, model, i2w)

    # test same metric same as in training on test set
    test_reader = create_reader(os.path.join(DATA_DIR, TESTING_DATA), False)
    evaluate_metric(test_reader, model)

    # try the model out in an interactive session
    interactive_session(model, vocab, i2w, show_attention=True)