Пример #1
0
def test_count_vectorizer_fit_transform():
    cv = CountVectorizer((1, 2))
    X = cv.fit_transform([["a", "b"], ["b", "c"]])
    assert cv.vocabulary
    assert X == [
        {
            cv.vocabulary["a"]: 1,
            cv.vocabulary["b"]: 1,
            cv.vocabulary["a b"]: 1,
            len(cv.vocabulary) - 1: 0,
        },
        {cv.vocabulary["b"]: 1, cv.vocabulary["c"]: 1, cv.vocabulary["b c"]: 1},
    ]
Пример #2
0
def test_naive_bayes_from_file(tmp_path):
    nb = NaiveBayesScorer(
        CTParsePipeline(CountVectorizer((1, 1)), MultinomialNaiveBayes()))
    path = tmp_path / "model.pkl"
    with bz2.open(path, "w") as f:
        pickle.dump(nb, f)
    nb = NaiveBayesScorer.from_model_file(path)
    assert nb
Пример #3
0
def train_naive_bayes(X: Sequence[Sequence[str]],
                      y: Sequence[bool]) -> CTParsePipeline:
    """Train a naive bayes model for NaiveBayesScorer"""
    y_binary = [1 if y_i else -1 for y_i in y]
    # Create and train the pipeline
    pipeline = CTParsePipeline(CountVectorizer(ngram_range=(1, 3)),
                               MultinomialNaiveBayes(alpha=1.0))
    model = pipeline.fit(X, y_binary)
    return model
Пример #4
0
def test_save_naive_bayes(tmp_path):
    path = tmp_path / "model.pkl"
    model = CTParsePipeline(CountVectorizer((1, 1)), MultinomialNaiveBayes())
    save_naive_bayes(model, path)
Пример #5
0
def test_count_vectorizer_transform_no_fit():
    cv = CountVectorizer((1, 2))
    with pytest.raises(ValueError):
        cv.transform([["a"]])
Пример #6
0
def test_count_vectorizer_fit_and_transform():
    cv = CountVectorizer((1, 2))
    cv = cv.fit([["a", "b", "c"], ["c", "d"]])
    assert cv.vocabulary
    assert cv.transform([["b"]]) == [{cv.vocabulary["b"]: 1, 6: 0}]
Пример #7
0
def test_ngrams(ngrams, doc, result):
    assert CountVectorizer._create_ngrams(ngrams, [doc]) == [result]