def test_serialize_custom_trainable_pipe(): class BadCustomPipe1(TrainablePipe): def __init__(self, vocab): pass class BadCustomPipe2(TrainablePipe): def __init__(self, vocab): self.vocab = vocab self.model = None class CustomPipe(TrainablePipe): def __init__(self, vocab, model): self.vocab = vocab self.model = model pipe = BadCustomPipe1(Vocab()) with pytest.raises(ValueError): pipe.to_bytes() with make_tempdir() as d: with pytest.raises(ValueError): pipe.to_disk(d) pipe = BadCustomPipe2(Vocab()) with pytest.raises(ValueError): pipe.to_bytes() with make_tempdir() as d: with pytest.raises(ValueError): pipe.to_disk(d) pipe = CustomPipe(Vocab(), Linear()) pipe_bytes = pipe.to_bytes() new_pipe = CustomPipe(Vocab(), Linear()).from_bytes(pipe_bytes) assert new_pipe.to_bytes() == pipe_bytes with make_tempdir() as d: pipe.to_disk(d) new_pipe = CustomPipe(Vocab(), Linear()).from_disk(d) assert new_pipe.to_bytes() == pipe_bytes
def test_issue208(): """Test issue that was caused by trying to flatten nested chains.""" layer1 = Linear(nO=9, nI=3) layer2 = Linear(nO=12, nI=9) layer3 = Linear(nO=5, nI=12) model = chain(layer1, chain(layer2, layer3)).initialize() assert model.get_dim("nO") == 5
def test_noop(): data = numpy.asarray([1, 2, 3], dtype="f") model = noop(Linear(), Linear()) model.initialize(data, data) Y, backprop = model(data, is_train=True) assert numpy.array_equal(Y, data) dX = backprop(Y) assert numpy.array_equal(dX, data)
def test_concatenate(): data = numpy.asarray([[1, 2, 3], [4, 5, 6]], dtype="f") model = concatenate(Linear(), Linear()) model.initialize(data, data) Y, backprop = model(data, is_train=True) assert Y.shape[1] == sum([layer.predict(data).shape[1] for layer in model.layers]) dX = backprop(Y) assert dX.shape == data.shape
def test_clone_changes_predictions(nH, nI): model1 = Linear(nH) model = clone(model1, 10) ones = numpy.ones((10, nI), dtype="f") model.initialize(X=ones) output_from_cloned = model.predict(ones) output_from_orig = model1.predict(ones) assert output_from_cloned.sum() != output_from_orig.sum()
def test_add_edge_cases(): data = numpy.asarray([[1, 2, 3, 4]], dtype="f") with pytest.raises(TypeError): add() model = add(Linear(), Linear()) model._layers = [] Y, backprop = model(data, is_train=True) assert numpy.array_equal(data, Y) dX = backprop(Y) assert numpy.array_equal(dX, data)
def test_linear_dimensions_on_data(): X = MagicMock(shape=(5, 10), spec=numpy.ndarray) X.ndim = 2 X.dtype = "float32" y = MagicMock(shape=(8,), spec=numpy.ndarray) y.ndim = 2 y.dtype = "float32" y.max = MagicMock() model = Linear() model.initialize(X, y) assert model.get_dim("nI") is not None y.max.assert_called_with()
def test_serialize_model_shims_roundtrip_bytes(): fwd = lambda model, X, is_train: (X, lambda dY: dY) test_shim = SerializableShim(None) shim_model = Model("shimmodel", fwd, shims=[test_shim]) model = chain(Linear(2, 3), shim_model, Maxout(2, 3)) model.initialize() assert model.layers[1].shims[0].value == "shimdata" model_bytes = model.to_bytes() with pytest.raises(ValueError): Linear(2, 3).from_bytes(model_bytes) test_shim = SerializableShim(None) shim_model = Model("shimmodel", fwd, shims=[test_shim]) new_model = chain(Linear(2, 3), shim_model, Maxout(2, 3)).from_bytes(model_bytes) assert new_model.layers[1].shims[0].value == "shimdata from bytes"
def test_pytorch_unwrapped(nN, nI, nO): model = Linear(nO, nI).initialize() X = numpy.zeros((nN, nI), dtype="f") X += numpy.random.uniform(size=X.size).reshape(X.shape) sgd = SGD(0.01) Y = numpy.zeros((nN, nO), dtype="f") check_learns_zero_output(model, sgd, X, Y)
def test_tensorflow_wrapper_construction_requires_keras_model(): import tensorflow as tf keras_model = tf.keras.Sequential([tf.keras.layers.Dense(12, input_shape=(12,))]) assert isinstance(TensorFlowWrapper(keras_model), Model) with pytest.raises(ValueError): TensorFlowWrapper(Linear(2, 3))
def test_add(): data = numpy.asarray([[1, 2, 3, 4]], dtype="f") model = add(Linear(), Linear()) model.initialize(data, data) Y, backprop = model(data, is_train=True) Y2 = sum(layer.predict(data) for layer in model.layers) assert numpy.array_equal(Y, Y2) dX = backprop(Y) assert dX.shape == data.shape # Test that nesting works model2 = add(model, Linear()) assert len(model2.layers) == 3 model.initialize(data, data) Y = model2.predict(data) Y2 = sum(layer.predict(data) for layer in model2.layers) assert numpy.array_equal(Y, Y2)
def build_text_classifier_v2( tok2vec: Model[List[Doc], List[Floats2d]], linear_model: Model[List[Doc], Floats2d], nO: Optional[int] = None, ) -> Model[List[Doc], Floats2d]: exclusive_classes = not linear_model.attrs["multi_label"] with Model.define_operators({">>": chain, "|": concatenate}): width = tok2vec.maybe_get_dim("nO") attention_layer = ParametricAttention( width) # TODO: benchmark performance difference of this layer maxout_layer = Maxout(nO=width, nI=width) norm_layer = LayerNorm(nI=width) cnn_model = ( tok2vec >> list2ragged() >> attention_layer >> reduce_sum() >> residual(maxout_layer >> norm_layer >> Dropout(0.0))) nO_double = nO * 2 if nO else None if exclusive_classes: output_layer = Softmax(nO=nO, nI=nO_double) else: output_layer = Linear(nO=nO, nI=nO_double) >> Logistic() model = (linear_model | cnn_model) >> output_layer model.set_ref("tok2vec", tok2vec) if model.has_dim("nO") is not False: model.set_dim("nO", nO) model.set_ref("output_layer", linear_model.get_ref("output_layer")) model.set_ref("attention_layer", attention_layer) model.set_ref("maxout_layer", maxout_layer) model.set_ref("norm_layer", norm_layer) model.attrs["multi_label"] = not exclusive_classes model.init = init_ensemble_textcat return model
def test_clone_gives_distinct_ids(nH, nI): model = clone(Linear(nH), 5) assert len(model.layers) == 5 seen_ids = set() for node in model.walk(): assert node.id not in seen_ids seen_ids.add(node.id) assert len(seen_ids) == 6
def test_predict_extensive(W_b_input): W, b, input_ = W_b_input nr_out, nr_in = W.shape model = Linear(nr_out, nr_in) model.set_param("W", W) model.set_param("b", b) einsummed = numpy.einsum( "bi,oi->bo", numpy.asarray(input_, dtype="float32"), numpy.asarray(W, dtype="float32"), optimize=False, ) expected_output = einsummed + b predicted_output = model.predict(input_) assert_allclose(predicted_output, expected_output, rtol=1e-04, atol=0.0001)
def build_nel_encoder(tok2vec: Model, nO: Optional[int] = None) -> Model: with Model.define_operators({">>": chain, "**": clone}): token_width = tok2vec.get_dim("nO") output_layer = Linear(nO=nO, nI=token_width) model = (tok2vec >> list2ragged() >> reduce_mean() >> residual( Maxout(nO=token_width, nI=token_width, nP=2, dropout=0.0)) >> output_layer) model.set_ref("output_layer", output_layer) model.set_ref("tok2vec", tok2vec) return model
def test_predict_weights(X, expected): W = numpy.asarray([1.0, 0.0, 0.0, 1.0], dtype="f").reshape((2, 2)) bias = numpy.asarray([0.0, 0.0], dtype="f") model = Linear(W.shape[0], W.shape[1]) model.set_param("W", W) model.set_param("b", bias) scores = model.predict(X.reshape((1, -1))) assert_allclose(scores.ravel(), expected)
def test_init_functions_are_called(): init_was_called = {} def register_init(name, model, X=None, Y=None): init_was_called[name] = True layer1 = Linear(5) layer2 = Linear(5) layer3 = Linear(5) layer1.init = partial(register_init, "one") layer2.init = partial(register_init, "two") layer3.init = partial(register_init, "three") # This is the nesting we'll get from operators. model = chain(layer1, chain(layer2, layer3)) assert not init_was_called model.initialize() assert init_was_called["one"] assert init_was_called["two"] assert init_was_called["three"]
def test_with_getitem(): data = ( numpy.asarray([[1, 2, 3, 4]], dtype="f"), numpy.asarray([[5, 6, 7, 8]], dtype="f"), ) model = with_getitem(1, Linear()) model.initialize(data, data) Y, backprop = model.begin_update(data) assert len(Y) == len(data) assert numpy.array_equal(Y[0], data[0]) # the other item stayed the same assert not numpy.array_equal(Y[1], data[1]) dX = backprop(Y) assert numpy.array_equal(dX[0], data[0]) assert not numpy.array_equal(dX[1], data[1])
def build_nel_encoder(tok2vec: Model, nO: Optional[int] = None) -> Model[List[Doc], Floats2d]: with Model.define_operators({">>": chain, "&": tuplify}): token_width = tok2vec.maybe_get_dim("nO") output_layer = Linear(nO=nO, nI=token_width) model = (((tok2vec >> list2ragged()) & build_span_maker()) >> extract_spans() >> reduce_mean() >> residual( Maxout(nO=token_width, nI=token_width, nP=2, dropout=0.0)) >> output_layer) model.set_ref("output_layer", output_layer) model.set_ref("tok2vec", tok2vec) # flag to show this isn't legacy model.attrs["include_span_maker"] = True return model
def build_text_classifier_lowdata( width: int, dropout: Optional[float], nO: Optional[int] = None) -> Model[List[Doc], Floats2d]: # Don't document this yet, I'm not sure it's right. # Note, before v.3, this was the default if setting "low_data" and "pretrained_dims" with Model.define_operators({">>": chain, "**": clone}): model = (StaticVectors(width) >> list2ragged() >> ParametricAttention(width) >> reduce_sum() >> residual( Relu(width, width))**2 >> Linear(nO, width)) if dropout: model = model >> Dropout(dropout) model = model >> Logistic() return model
def test_chain(ops): data = numpy.asarray([[1, 2, 3, 4]], dtype="f") model = chain(Linear(1), Dropout(), Linear(1)) model.ops = ops model.initialize(data, data) Y, backprop = model(data, is_train=True) backprop(Y) # Layers with and without nO/nI model = chain(Linear(1), Dropout(), Linear(1, 1)) model.initialize(data, data) # Setting dim on model model = chain(Linear(1), Dropout(), Linear(1)) model.set_dim("nO", 1) model.initialize(data, None) model = chain(Linear(1, 1), Dropout(), Linear(1, 1)) model.set_dim("nI", 1) model.initialize(None, data) # Not enough arguments with pytest.raises(TypeError): chain(Linear()) with pytest.raises(TypeError): chain()
def test_map_list(): nI = 4 nO = 9 Xs = [numpy.zeros((6, nI), dtype="f"), numpy.ones((3, nI), dtype="f")] Y_shapes = [(x.shape[0], nO) for x in Xs] model = map_list(Linear()) model.initialize(X=Xs, Y=[numpy.zeros(shape, dtype="f") for shape in Y_shapes]) Ys, backprop = model(Xs, is_train=True) assert isinstance(Ys, list) assert len(Ys) == len(Xs) layer = model.layers[0] for X, Y in zip(Xs, Ys): assert_allclose(layer.predict(X), Y) dXs = backprop(Ys) assert isinstance(dXs, list) assert len(dXs) == len(Xs) assert dXs[0].shape == Xs[0].shape assert dXs[1].shape == Xs[1].shape
def build_simple_cnn_text_classifier( tok2vec: Model, exclusive_classes: bool, nO: Optional[int] = None) -> Model[List[Doc], Floats2d]: """ Build a simple CNN text classifier, given a token-to-vector model as inputs. If exclusive_classes=True, a softmax non-linearity is applied, so that the outputs sum to 1. If exclusive_classes=False, a logistic non-linearity is applied instead, so that outputs are in the range [0, 1]. """ fill_defaults = {"b": 0, "W": 0} with Model.define_operators({">>": chain}): cnn = tok2vec >> list2ragged() >> reduce_mean() nI = tok2vec.maybe_get_dim("nO") if exclusive_classes: output_layer = Softmax(nO=nO, nI=nI) fill_defaults["b"] = NEG_VALUE resizable_layer: Model = resizable( output_layer, resize_layer=partial(resize_linear_weighted, fill_defaults=fill_defaults), ) model = cnn >> resizable_layer else: output_layer = Linear(nO=nO, nI=nI) resizable_layer = resizable( output_layer, resize_layer=partial(resize_linear_weighted, fill_defaults=fill_defaults), ) model = cnn >> resizable_layer >> Logistic() model.set_ref("output_layer", output_layer) model.attrs["resize_output"] = partial( resize_and_set_ref, resizable_layer=resizable_layer, ) model.set_ref("tok2vec", tok2vec) model.set_dim( "nO", nO ) # type: ignore # TODO: remove type ignore once Thinc has been updated model.attrs["multi_label"] = not exclusive_classes return model
def build_cloze_multi_task_model( vocab: "Vocab", tok2vec: Model, maxout_pieces: int, hidden_size: int ) -> Model: nO = vocab.vectors.data.shape[1] output_layer = chain( list2array(), Maxout( nO=hidden_size, nI=tok2vec.get_dim("nO"), nP=maxout_pieces, normalize=True, dropout=0.0, ), Linear(nO=nO, nI=hidden_size, init_W=zero_init), ) model = chain(tok2vec, output_layer) model = build_masked_language_model(vocab, model) model.set_ref("tok2vec", tok2vec) model.set_ref("output_layer", output_layer) return model
def test_with_debug(): on_init = MagicMock() on_forward = MagicMock() on_backprop = MagicMock() model = with_debug(Linear(), on_init=on_init, on_forward=on_forward, on_backprop=on_backprop) on_init.assert_not_called() on_forward.assert_not_called() on_backprop.assert_not_called() X = model.ops.alloc2f(1, 1) Y = model.ops.alloc2f(1, 1) model.initialize(X=X, Y=Y) on_init.assert_called_once_with(model, X, Y) on_forward.assert_not_called() on_backprop.assert_not_called() Yh, backprop = model(X, is_train=True) on_forward.assert_called_once_with(model, X, True) on_backprop.assert_not_called() backprop(Y) on_backprop.assert_called_once_with(Y)
def build_simple_cnn_text_classifier( tok2vec: Model, exclusive_classes: bool, nO: Optional[int] = None ) -> Model[List[Doc], Floats2d]: """ Build a simple CNN text classifier, given a token-to-vector model as inputs. If exclusive_classes=True, a softmax non-linearity is applied, so that the outputs sum to 1. If exclusive_classes=False, a logistic non-linearity is applied instead, so that outputs are in the range [0, 1]. """ with Model.define_operators({">>": chain}): cnn = tok2vec >> list2ragged() >> reduce_mean() if exclusive_classes: output_layer = Softmax(nO=nO, nI=tok2vec.maybe_get_dim("nO")) model = cnn >> output_layer model.set_ref("output_layer", output_layer) else: linear_layer = Linear(nO=nO, nI=tok2vec.maybe_get_dim("nO")) model = cnn >> linear_layer >> Logistic() model.set_ref("output_layer", linear_layer) model.set_ref("tok2vec", tok2vec) model.set_dim("nO", nO) model.attrs["multi_label"] = not exclusive_classes return model
def build_linear_logistic(nO=None, nI=None) -> Model[Floats2d, Floats2d]: """An output layer for multi-label classification. It uses a linear layer followed by a logistic activation. """ return chain(Linear(nO=nO, nI=nI, init_W=glorot_uniform_init), Logistic())
def linear(): return Linear(5, 3)
def model2(nO, nH): model = Linear(nO, nH).initialize() return model
def create_classification_layer(nO: int = None, nI: int = None) -> Model[Floats2d, Floats2d]: with Model.define_operators({">>": chain}): return Linear(nO=nO, nI=nI) >> Logistic()