示例#1
0
def test_build_model():
    X = ["One and two", "One only", "Two nothing else", "Two and three"]
    Y = np.array([[1, 1, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0], [0, 1, 1, 0]])

    vectorizer = KerasVectorizer()
    X_vec = vectorizer.fit_transform(X)

    batch_size = 2
    model = CNNClassifier(batch_size=batch_size,
                          multilabel=True,
                          learning_rate=1e-2)
    model.fit(X_vec, Y)

    Y_pred = model.predict(X_vec)
    assert Y_pred.shape[1] == 4

    Y = Y[:, :3]
    sequence_length = X_vec.shape[1]
    vocab_size = X_vec.max() + 1
    nb_outputs = Y.shape[1]
    decay_steps = X_vec.shape[0] / batch_size

    model.build_model(sequence_length, vocab_size, nb_outputs, decay_steps)
    model.fit(X_vec, Y)

    Y_pred = model.predict(X_vec)
    assert Y_pred.shape[1] == 3
示例#2
0
def test_vocab_size():
    X = ["One", "Two", "Three"]

    vocab_size = 1
    keras_vectorizer = KerasVectorizer(vocab_size=vocab_size)
    X_vec = keras_vectorizer.fit_transform(X)

    assert X_vec.max() == vocab_size
示例#3
0
def test_sequence_length():
    X = ["One", "Two", "Three"]

    sequence_length = 5
    keras_vectorizer = KerasVectorizer(sequence_length=sequence_length)
    X_vec = keras_vectorizer.fit_transform(X)

    assert X_vec.shape[1] == sequence_length
示例#4
0
def test_vanilla():
    X = ["One", "Two", "Three Four"]

    keras_vectorizer = KerasVectorizer()
    X_vec = keras_vectorizer.fit_transform(X)

    assert X_vec.shape[0] == 3
    assert X_vec.shape[1] == 2
    assert X_vec.max() == 5  # 4 tokens including OOV
示例#5
0
def test_XY_dataset_sparse_y():
    X = ["One and two", "One only", "Two nothing else", "Two and three"]
    Y = np.array([[1, 1, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0], [0, 1, 1, 0]])
    Y_sparse = csr_matrix(Y)

    vec = KerasVectorizer()
    X_vec = vec.fit_transform(X)

    train_data = tf.data.Dataset.from_tensor_slices((X_vec, Y))
    test_data = tf.data.Dataset.from_tensor_slices((X_vec))
    clf = CNNClassifier(batch_size=2, sparse_y=True, multilabel=True)
    clf.fit(train_data)
    assert clf.score(test_data, Y_sparse) > 0.3
示例#6
0
def test_XY_dataset():
    X = ["One", "One only", "Two nothing else", "Two and three"]
    Y = np.array([0, 0, 1, 1])

    vec = KerasVectorizer()
    X_vec = vec.fit_transform(X)

    data = tf.data.Dataset.from_tensor_slices((X_vec, Y))
    data = data.shuffle(100, seed=42)
    clf = CNNClassifier(batch_size=2)

    clf.fit(data)
    assert clf.score(data, Y) > 0.3
示例#7
0
def test_save_load_attention():
    X = ["One", "One only", "Two nothing else", "Two and three"]
    Y = np.array([0, 0, 1, 1])

    vec = KerasVectorizer()
    X_vec = vec.fit_transform(X)

    model = BiLSTMClassifier(attention=True)
    model.fit(X_vec, Y)

    with tempfile.TemporaryDirectory() as tmp_dir:
        model.save(tmp_dir)
        loaded_model = BiLSTMClassifier()
        loaded_model.load(tmp_dir)
        assert hasattr(loaded_model, 'model')
        assert loaded_model.score(X_vec, Y) > 0.6