def get_corpus(self): """ Lire et peupler le corpus """ if self.corpus is None: self.corpus = Dictionary() self.corpus.updated = time.time() try: directory = Paths.get_root_dir(*CORPUS_PATH) infile = '{name}.csv'.format(name=self.pathname) path = join(directory, '{name}.csv.zip'.format(name=self.pathname)) # Lire le CSV dans le fichier zip with ZipFile(open(path, 'rb')) as zipfile: buffer = StringIO(zipfile.read(infile)) reader = csv.reader(buffer) for row in reader: # 0: category, 1: doc, 2: hash self.corpus[row[2]] = (row[0], row[1]) except IOError: pass if self.corpus_shadow is None or self.corpus_shadow.updated < self.corpus.updated: self.corpus_shadow = List(self.corpus.values()) self.corpus_shadow.updated = time.time() self.classifier = MaxEntClassifier( self.corpus_shadow, feature_extractor=extractor_base) # ou NaiveBayesClassifier return self.corpus_shadow
def add_global_hook(): tweets = TweetBank(50) train, test = tweets.data_set() naive_bayes = NaiveBayesClassifier(train) maxent = MaxEntClassifier(train) classifier_dictionary = {"Naive Bayes": naive_bayes, "Maxent": maxent} g = web.storage({ "classifier_dictionary": classifier_dictionary, "test_set": test }) def _wrapper(handler): web.ctx.globals = g return handler() return _wrapper
def __init__(self, data): # self._classifier = NaiveBayesClassifier(data) self._classifier = MaxEntClassifier(data)
def setUp(self): self.classifier = MaxEntClassifier(train_set)
print(len(words), len(tags)) for i in range(1000): if (i < 800): temp = (words[i], tags[i]) train.append(temp) else: temp = (words[i], tags[i]) test.append(temp) print(train) print(test) naive = NaiveBayesClassifier(train) dtc = DecisionTreeClassifier(train) mec = MaxEntClassifier(train) print("NaiveBayesClassifier Accuracy: {0}".format(naive.accuracy(test))) print("DecisionTreeClassifier Accuracy: {0}".format(dtc.accuracy(test))) print("MaxEntClassifier Accuracy: {0}".format(mec.accuracy(test))) cl = NaiveBayesClassifier(train) print("NaiveBayesClassifier Accuracy: {0}".format(cl.accuracy(test))) for i in range(0, len(test)): tag = cl.classify(test[i]) pred_tags.append(tag) if (tag == test_tags[i]): count += 1 print(len(pred_tags), len(test_tags)) print(count)
def search_department(job, train): cl_depart = MaxEntClassifier(train) prob_dist = cl_depart.prob_classify(job) print(prob_dist.max()) return prob_dist.max()
# trains.append(train[i]) trains = train if choice == "1": print("\n" + "#NaiveBayesClassifier") cl1 = NaiveBayesClassifier(trains) print("Classifier: Naive Bayes -- Accuracy: ", cl1.accuracy(test), "\n") elif choice == "2": print("\n" + "#DecisionTreeClassifier") cl2 = DecisionTreeClassifier(trains) print("Classifier: Decision Tree -- Accuracy: ", cl2.accuracy(test), "\n") elif choice == "3": print("\n" + "#MaxEntClassifier") cl3 = MaxEntClassifier(trains) print("Classifier: Maximum Entropy -- Accuracy: ", cl3.accuracy(test), "\n") elif choice == "4": print("\n" + "#NLTKClassifier") cl4 = NLTKClassifier(trains) print("Classifier: NLTK -- Accuracy: ", cl4.accuracy(test), "\n") else: print("Bad input!") # most repeated words (most important properties) totalDictPosSorted = sorted(totalDictPos.items(), key=operator.itemgetter(1)) totalDictNegSorted = sorted(totalDictNeg.items(), key=operator.itemgetter(1))
from textblob.classifiers import MaxEntClassifier with open('data/train-toy.csv', 'r') as fp: cl = MaxEntClassifier(fp, format="csv") with open('data/test-toy.csv', 'r') as gp: print cl.accuracy(gp, format="csv")
print('Before pre-processing \n') cl = DecisionTreeClassifier(training_array) classify_review(cl) print('\n After removing stop-words \n') cl = DecisionTreeClassifier(training_array_without_sw) classify_review(cl) print('\n After stemming \n') cl = DecisionTreeClassifier(training_array_stemmed_without_sw) classify_review(cl) print('\n ************ NaiveBayesClassifier ********************\n') print('Before pre-processing\n') cl = NaiveBayesClassifier(training_array) classify_review(cl) print('\n After removing stop-words \n') cl = NaiveBayesClassifier(training_array_without_sw) classify_review(cl) print('\n After stemming \n') cl = NaiveBayesClassifier(training_array_stemmed_without_sw) classify_review(cl) print('\n ************ MaxEntClassifier ********************\n') cl= MaxEntClassifier(training_array) print('Before pre-processing\n') classify_review(cl) print('\n After removing stop-words \n') cl = MaxEntClassifier(training_array_without_sw) classify_review(cl) print('\n After stemming \n') cl = MaxEntClassifier(training_array_stemmed_without_sw) classify_review(cl)