def test_infer_from_data():
    X = ["One", "Two words", "Three words here"]

    keras_vectorizer = KerasVectorizer()
    keras_vectorizer.fit(X)

    assert keras_vectorizer.sequence_length == 3
def test_build_embedding_matrix_word_vectors():

    X = ["One", "Two", "Three"]

    vocab_size = 1
    keras_vectorizer = KerasVectorizer(vocab_size=vocab_size)
    keras_vectorizer.fit(X)

    embedding_matrix = keras_vectorizer.build_embedding_matrix(
        embeddings_name_or_path="glove-twitter-25")

    assert embedding_matrix.shape == (5, 25)
def test_build_embedding_matrix():

    X = ["One", "Two", "Three"]

    vocab_size = 1
    keras_vectorizer = KerasVectorizer(vocab_size=vocab_size)
    keras_vectorizer.fit(X)

    with tempfile.TemporaryDirectory() as tmp_dir:
        embeddings_path = os.path.join(tmp_dir, "embeddings.csv")
        embeddings = [
            "one 0 1 0 0 0",
            "two 0 0 1 0 0",
            "three 0 0 0 1 0",
            "four 0 0 0 0 1",
        ]
        with open(embeddings_path, "w") as embeddings_path_tmp:
            for line in embeddings:
                embeddings_path_tmp.write(line)
                embeddings_path_tmp.write("\n")
        embedding_matrix = keras_vectorizer.build_embedding_matrix(
            embeddings_name_or_path=embeddings_path)

        assert embedding_matrix.shape == (5, 5)