Python CountVectorizer примеры использования

Язык программирования: Python

Пространство имен/Пакет: ctparse.count_vectorizer

Класс/Тип: CountVectorizer

Примеров на hotexamples.com: 7

Python CountVectorizer - 7 примеров найдено. Это лучшие примеры Python кода для ctparse.count_vectorizer.CountVectorizer, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

CountVectorizer(6)

transform(2)

_create_ngrams(1)

fit(1)

fit_transform(1)

Пример #1

Показать файл

def test_count_vectorizer_fit_transform():
    cv = CountVectorizer((1, 2))
    X = cv.fit_transform([["a", "b"], ["b", "c"]])
    assert cv.vocabulary
    assert X == [
        {
            cv.vocabulary["a"]: 1,
            cv.vocabulary["b"]: 1,
            cv.vocabulary["a b"]: 1,
            len(cv.vocabulary) - 1: 0,
        },
        {cv.vocabulary["b"]: 1, cv.vocabulary["c"]: 1, cv.vocabulary["b c"]: 1},
    ]

Пример #2

Показать файл

def test_naive_bayes_from_file(tmp_path):
    nb = NaiveBayesScorer(
        CTParsePipeline(CountVectorizer((1, 1)), MultinomialNaiveBayes()))
    path = tmp_path / "model.pkl"
    with bz2.open(path, "w") as f:
        pickle.dump(nb, f)
    nb = NaiveBayesScorer.from_model_file(path)
    assert nb

Пример #3

Показать файл

Файл: nb_scorer.py Проект: Acreom/quickadd

def train_naive_bayes(X: Sequence[Sequence[str]],
                      y: Sequence[bool]) -> CTParsePipeline:
    """Train a naive bayes model for NaiveBayesScorer"""
    y_binary = [1 if y_i else -1 for y_i in y]
    # Create and train the pipeline
    pipeline = CTParsePipeline(CountVectorizer(ngram_range=(1, 3)),
                               MultinomialNaiveBayes(alpha=1.0))
    model = pipeline.fit(X, y_binary)
    return model

Пример #4

Показать файл

def test_save_naive_bayes(tmp_path):
    path = tmp_path / "model.pkl"
    model = CTParsePipeline(CountVectorizer((1, 1)), MultinomialNaiveBayes())
    save_naive_bayes(model, path)

Пример #5

Показать файл

def test_count_vectorizer_transform_no_fit():
    cv = CountVectorizer((1, 2))
    with pytest.raises(ValueError):
        cv.transform([["a"]])

Пример #6

Показать файл

def test_count_vectorizer_fit_and_transform():
    cv = CountVectorizer((1, 2))
    cv = cv.fit([["a", "b", "c"], ["c", "d"]])
    assert cv.vocabulary
    assert cv.transform([["b"]]) == [{cv.vocabulary["b"]: 1, 6: 0}]

Пример #7

Показать файл

def test_ngrams(ngrams, doc, result):
    assert CountVectorizer._create_ngrams(ngrams, [doc]) == [result]