Exemplo n.º 1
0
 def __init__(self):
   db_mgr = DataManager(self.DATABASE)
   self.train_tweets, self.train_labels = db_mgr.select_wikipedia_train()
   self.vectorizer = get_vectorizer("tfidf", min_df=1)
   self.nb = Classifier(classifier="nb")
   self.train_data = self.vectorizer.fit_transform(self.train_tweets)
   self.nb.fit(self.train_data, self.train_labels)
Exemplo n.º 2
0
from features import *
from classifiers import Classifier

from db import DataManager

N_TIMES = 1

for i in range(0,N_TIMES):
  print i+1, "times"
  DATABASE = "us_twitter.db"

  split = 0.8

  db_mgr = DataManager(DATABASE)

  train_tweets, train_labels = db_mgr.select_wikipedia_train()
  test_tweets, test_labels, dummy1, dummy2 = db_mgr.select_tweets(limit=10, state_fips=True, table="us_tweets", label=state_fips)

  results = get("results.json")

  vectorizer = get_vectorizer("tfidf", min_df=1)

  classifiers = {
    "BernoulliNB": Classifier(classifier="bnb"),
    "MultinomialNB": Classifier(classifier="nb"),
    "KNN-1000": Classifier(classifier="knn", k=1000),
    "KNN-2000": Classifier(classifier="knn", k=2000),
    # "SVC": Classifier(classifier="svm", params={"C" : 1.0,"kernel" : 'linear','verbose':True})
    "SVC": Classifier(load="classifier-SVC")
  }
Exemplo n.º 3
0
import pickle

from lib import *
from labels import *
from metrics import *
from features import *

from db import DataManager
from classifiers import Classifier

DATABASE = "us_twitter.db"
db_mgr = DataManager(DATABASE)
train_data, train_labels = db_mgr.select_wikipedia_train()

vectorizers = {
  "count":get_vectorizer("tfidf", min_df=1),
  "tfidf":get_vectorizer("count", min_df=1)
}

print "Vectorizing Training Data..."
count_data = vectorizers["count"].fit_transform(train_data)
tf_idf_data = vectorizers["tfidf"].fit_transform(train_data)

classifiers = {
  "BernoulliNB": {
    "count":Classifier(classifier="bnb"),
    "tfidf":Classifier(classifier="bnb")
  },
  "MultinomialNB": {
    "count":Classifier(classifier="nb"),
    "tfidf":Classifier(classifier="nb")