def test_vocab_size(): X = ["One", "Two", "Three"] vocab_size = 1 keras_vectorizer = KerasVectorizer(vocab_size=vocab_size) X_vec = keras_vectorizer.fit_transform(X) assert X_vec.max() == vocab_size
def test_sequence_length(): X = ["One", "Two", "Three"] sequence_length = 5 keras_vectorizer = KerasVectorizer(sequence_length=sequence_length) X_vec = keras_vectorizer.fit_transform(X) assert X_vec.shape[1] == sequence_length
def test_vanilla(): X = ["One", "Two", "Three Four"] keras_vectorizer = KerasVectorizer() X_vec = keras_vectorizer.fit_transform(X) assert X_vec.shape[0] == 3 assert X_vec.shape[1] == 2 assert X_vec.max() == 5 # 4 tokens including OOV
def test_XY_dataset_sparse_y(): X = ["One and two", "One only", "Two nothing else", "Two and three"] Y = np.array([[1, 1, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0], [0, 1, 1, 0]]) Y_sparse = csr_matrix(Y) vec = KerasVectorizer() X_vec = vec.fit_transform(X) data = tf.data.Dataset.from_tensor_slices((X_vec, Y)) data = data.shuffle(100, seed=42) clf = CNNClassifier(batch_size=2, sparse_y=True, multilabel=True) clf.fit(data) assert clf.score(data, Y_sparse) > 0.3
def test_XY_dataset(): X = ["One", "One only", "Two nothing else", "Two and three"] Y = np.array([0, 0, 1, 1]) vec = KerasVectorizer() X_vec = vec.fit_transform(X) data = tf.data.Dataset.from_tensor_slices((X_vec, Y)) data = data.shuffle(100, seed=42) clf = CNNClassifier(batch_size=2) clf.fit(data) assert clf.score(data, Y) > 0.3
def test_save_load(): X = ["One", "One only", "Two nothing else", "Two and three"] Y = np.array([0, 0, 1, 1]) vec = KerasVectorizer() X_vec = vec.fit_transform(X) model = CNNClassifier() model.fit(X_vec, Y) with tempfile.TemporaryDirectory() as tmp_dir: model.save(tmp_dir) loaded_model = CNNClassifier() loaded_model.load(tmp_dir) assert hasattr(loaded_model, 'model') assert loaded_model.score(X_vec, Y) > 0.6