for n in [2000, 5000, 10000, 15000, 20000]: model = ClassifierModel("Count Max Feature {}".format(n), CountVectorizer(max_features=n)) models.append(model) for n in [2000, 5000, 10000, 15000, 20000]: model = ClassifierModel("Count Max Feature {}".format(n), TfidfVectorizer(max_features=n)) models.append(model) for n in [500, 700, 800, 900, 1000, 2000, 3000, 4000, 5000]: for ngram in [('Bigram', (1, 2)), ("Trigram", (1, 3))]: model = ClassifierModel( "Count {0} + Max Feature {1}".format(ngram[0], n), CountVectorizer(ngram_range=ngram[1], max_features=n)) models.append(model) for n in [500, 700, 800, 900, 1000, 2000, 3000, 4000, 5000]: for ngram in [('Bigram', (1, 2)), ("Trigram", (1, 3))]: model = ClassifierModel( "Count {0} + Max Feature {1}".format(ngram[0], n), TfidfVectorizer(ngram_range=ngram[1], max_features=n)) models.append(model) for model in models: model.load_data(X_train, y_train, X_test, y_test) model.fit_transform() model.train(clf_model=MultinomialNB()) model.evaluate(model_name="MultinomialNB")
models.append(model) for n in [2000, 5000, 10000, 15000, 20000]: model = ClassifierModel( "Tfidf Max Feature {}".format(n), TfidfVectorizer(max_features=n) ) models.append(model) for n in [500, 700, 800, 900, 1000, 2000, 3000, 4000, 5000]: for ngram in [('Bigram', (1, 2)), ("Trigram", (1, 3))]: model = ClassifierModel( "Count {0} + Max Feature {1}".format(ngram[0], n), CountVectorizer(ngram_range=ngram[1], max_features=n) ) models.append(model) for n in [500, 700, 800, 900, 1000, 2000, 3000, 4000, 5000]: for ngram in [('Bigram', (1, 2)), ("Trigram", (1, 3))]: model = ClassifierModel( "Tfidf {0} + Max Feature {1}".format(ngram[0], n), TfidfVectorizer(ngram_range=ngram[1], max_features=n) ) models.append(model) for model in models: model.load_data(X_train, y_train, X_test, y_test) model.fit_transform() model.train(clf_model=LinearSVC()) model.evaluate(model_name="LinearSVC")
for n in [2000, 5000, 10000, 15000, 20000]: model = ClassifierModel("Count Max Feature {}".format(n), CountVectorizer(max_features=n)) models.append(model) for n in [2000, 5000, 10000, 15000, 20000]: model = ClassifierModel("Tfidf Max Feature {}".format(n), TfidfVectorizer(max_features=n)) models.append(model) for n in [500, 700, 800, 900, 1000, 2000, 3000, 4000, 5000]: for ngram in [('Bigram', (1, 2)), ("Trigram", (1, 3))]: model = ClassifierModel( "Count {0} + Max Feature {1}".format(ngram[0], n), CountVectorizer(ngram_range=ngram[1], max_features=n)) models.append(model) for n in [500, 700, 800, 900, 1000, 2000, 3000, 4000, 5000]: for ngram in [('Bigram', (1, 2)), ("Trigram", (1, 3))]: model = ClassifierModel( "Tfidf {0} + Max Feature {1}".format(ngram[0], n), TfidfVectorizer(ngram_range=ngram[1], max_features=n)) models.append(model) for model in models: model.load_data(X_train, y_train, X_test, y_test) model.fit_transform() model.train(clf_model=SVC()) model.evaluate(model_name="SVC")
models.append(model) for n in [2000, 5000, 10000, 15000, 20000]: model = ClassifierModel( "Tfidf Max Feature {}".format(n), TfidfVectorizer(max_features=n) ) models.append(model) for n in [500, 700, 800, 900, 1000, 2000, 3000, 4000, 5000]: for ngram in [('Bigram', (1, 2)), ("Trigram", (1, 3))]: model = ClassifierModel( "Count {0} + Max Feature {1}".format(ngram[0], n), CountVectorizer(ngram_range=ngram[1], max_features=n) ) models.append(model) for n in [500, 700, 800, 900, 1000, 2000, 3000, 4000, 5000]: for ngram in [('Bigram', (1, 2)), ("Trigram", (1, 3))]: model = ClassifierModel( "Tfidf {0} + Max Feature {1}".format(ngram[0], n), TfidfVectorizer(ngram_range=ngram[1], max_features=n) ) models.append(model) for model in models: model.load_data(X_train, y_train, X_test, y_test) model.fit_transform() model.train(clf_model=LogisticRegression()) model.evaluate(model_name="LogisticRegression")