Exemplo n.º 1
0
    for n in [2000, 5000, 10000, 15000, 20000]:
        model = ClassifierModel("Count Max Feature {}".format(n),
                                CountVectorizer(max_features=n))
        models.append(model)

    for n in [2000, 5000, 10000, 15000, 20000]:
        model = ClassifierModel("Count Max Feature {}".format(n),
                                TfidfVectorizer(max_features=n))
        models.append(model)

    for n in [500, 700, 800, 900, 1000, 2000, 3000, 4000, 5000]:
        for ngram in [('Bigram', (1, 2)), ("Trigram", (1, 3))]:
            model = ClassifierModel(
                "Count {0} + Max Feature {1}".format(ngram[0], n),
                CountVectorizer(ngram_range=ngram[1], max_features=n))
            models.append(model)

    for n in [500, 700, 800, 900, 1000, 2000, 3000, 4000, 5000]:
        for ngram in [('Bigram', (1, 2)), ("Trigram", (1, 3))]:
            model = ClassifierModel(
                "Count {0} + Max Feature {1}".format(ngram[0], n),
                TfidfVectorizer(ngram_range=ngram[1], max_features=n))
            models.append(model)

    for model in models:
        model.load_data(X_train, y_train, X_test, y_test)
        model.fit_transform()
        model.train(clf_model=MultinomialNB())
        model.evaluate(model_name="MultinomialNB")
Exemplo n.º 2
0
    models.append(model)

for n in [2000, 5000, 10000, 15000, 20000]:
    model = ClassifierModel(
        "Tfidf Max Feature {}".format(n),
        TfidfVectorizer(max_features=n)
    )
    models.append(model)

for n in [500, 700, 800, 900, 1000, 2000, 3000, 4000, 5000]:
    for ngram in [('Bigram', (1, 2)), ("Trigram", (1, 3))]:
        model = ClassifierModel(
            "Count {0} + Max Feature {1}".format(ngram[0], n),
            CountVectorizer(ngram_range=ngram[1], max_features=n)
        )
        models.append(model)

for n in [500, 700, 800, 900, 1000, 2000, 3000, 4000, 5000]:
    for ngram in [('Bigram', (1, 2)), ("Trigram", (1, 3))]:
        model = ClassifierModel(
            "Tfidf {0} + Max Feature {1}".format(ngram[0], n),
            TfidfVectorizer(ngram_range=ngram[1], max_features=n)
        )
        models.append(model)

for model in models:
    model.load_data(X_train, y_train, X_test, y_test)
    model.fit_transform()
    model.train(clf_model=LinearSVC())
    model.evaluate(model_name="LinearSVC")
Exemplo n.º 3
0
    for n in [2000, 5000, 10000, 15000, 20000]:
        model = ClassifierModel("Count Max Feature {}".format(n),
                                CountVectorizer(max_features=n))
        models.append(model)

    for n in [2000, 5000, 10000, 15000, 20000]:
        model = ClassifierModel("Tfidf Max Feature {}".format(n),
                                TfidfVectorizer(max_features=n))
        models.append(model)

    for n in [500, 700, 800, 900, 1000, 2000, 3000, 4000, 5000]:
        for ngram in [('Bigram', (1, 2)), ("Trigram", (1, 3))]:
            model = ClassifierModel(
                "Count {0} + Max Feature {1}".format(ngram[0], n),
                CountVectorizer(ngram_range=ngram[1], max_features=n))
            models.append(model)

    for n in [500, 700, 800, 900, 1000, 2000, 3000, 4000, 5000]:
        for ngram in [('Bigram', (1, 2)), ("Trigram", (1, 3))]:
            model = ClassifierModel(
                "Tfidf {0} + Max Feature {1}".format(ngram[0], n),
                TfidfVectorizer(ngram_range=ngram[1], max_features=n))
            models.append(model)

    for model in models:
        model.load_data(X_train, y_train, X_test, y_test)
        model.fit_transform()
        model.train(clf_model=SVC())
        model.evaluate(model_name="SVC")
Exemplo n.º 4
0
        models.append(model)

    for n in [2000, 5000, 10000, 15000, 20000]:
        model = ClassifierModel(
            "Tfidf Max Feature {}".format(n),
            TfidfVectorizer(max_features=n)
        )
        models.append(model)

    for n in [500, 700, 800, 900, 1000, 2000, 3000, 4000, 5000]:
        for ngram in [('Bigram', (1, 2)), ("Trigram", (1, 3))]:
            model = ClassifierModel(
                "Count {0} + Max Feature {1}".format(ngram[0], n),
                CountVectorizer(ngram_range=ngram[1], max_features=n)
            )
            models.append(model)

    for n in [500, 700, 800, 900, 1000, 2000, 3000, 4000, 5000]:
        for ngram in [('Bigram', (1, 2)), ("Trigram", (1, 3))]:
            model = ClassifierModel(
                "Tfidf {0} + Max Feature {1}".format(ngram[0], n),
                TfidfVectorizer(ngram_range=ngram[1], max_features=n)
            )
            models.append(model)

    for model in models:
        model.load_data(X_train, y_train, X_test, y_test)
        model.fit_transform()
        model.train(clf_model=LogisticRegression())
        model.evaluate(model_name="LogisticRegression")