Exemplo n.º 1
0
def dispersion():
    corpus = load_hobbies()
    target_words = ["Game", "player", "score", "oil", "Man"]

    oz = DispersionPlot(target_words, ax=newfig())
    oz.fit([doc.split() for doc in corpus.data])
    savefig(oz, "dispersion")
Exemplo n.º 2
0
def tsne():
    corpus = load_hobbies()
    docs = TfidfVectorizer().fit_transform(corpus.data)

    oz = TSNEVisualizer(ax=newfig())
    oz.fit(docs, corpus.target)
    savefig(oz, "corpus_tsne")
Exemplo n.º 3
0
def freqdist():
    corpus = load_hobbies()
    vecs = CountVectorizer()
    docs = vecs.fit_transform(corpus.data)

    oz = FreqDistVisualizer(features=vecs.get_feature_names(), ax=newfig())
    oz.fit(docs)
    savefig(oz, "freqdist")
    def test_quick_method(self):
        """
        Test for tsne quick  method with hobbies dataset
        """
        corpus = load_hobbies()
        tfidf = TfidfVectorizer()

        X = tfidf.fit_transform(corpus.data)
        y = corpus.target

        viz = tsne(X, y, show=False)

        self.assert_images_similar(viz, tol=50)
Exemplo n.º 5
0
    def test_topic_modeling_k_means(self):
        """
        Test topic modeling k-means on the hobbies corpus
        """
        corpus = load_hobbies()

        tfidf = TfidfVectorizer()
        docs = tfidf.fit_transform(corpus.data)
        visualizer = KElbowVisualizer(KMeans(), k=(4, 8))

        visualizer.fit(docs)
        visualizer.finalize()

        self.assert_images_similar(visualizer)
Exemplo n.º 6
0
    def test_quick_method(self):
        """
        Test for umap quick  method with hobbies dataset
        """
        corpus = load_hobbies()
        tfidf = TfidfVectorizer()

        X = tfidf.fit_transform(corpus.data)
        y = corpus.target

        viz = umap(X, y, show=False)
        assert isinstance(viz, UMAPVisualizer)

        self.assert_images_similar(viz, tol=50)
Exemplo n.º 7
0
## Imports
##########################################################################

import pytest
import matplotlib.pyplot as plt

from yellowbrick.exceptions import YellowbrickValueError
from yellowbrick.datasets import load_hobbies
from yellowbrick.text.dispersion import DispersionPlot, dispersion
from tests.base import VisualTestCase

##########################################################################
## Data
##########################################################################

corpus = load_hobbies()

##########################################################################
## DispersionPlot Tests
##########################################################################


class TestDispersionPlot(VisualTestCase):
    def test_quick_method(self):
        """
        Assert no errors occur when using the quick method
        """
        _, ax = plt.subplots()

        text = [doc.split() for doc in corpus.data]
        search_terms = ["Game", "player", "score", "oil", "Man"]
Exemplo n.º 8
0
def load_document_representation():
    corpus = load_hobbies()
    tfidf = TfidfVectorizer()
    docs = tfidf.fit_transform(corpus.data)
    return docs, corpus.target