def test_save_load(self): """Test saving and loading with blog classifier""" train, test = self.split_blogs_corpus(BlogFeatures) classifier = NaiveBayes() classifier.train(train) classifier.save("model") class2 = NaiveBayes() class2.load("model") self.assertGreater(accuracy(class2, test), 0.55)
def test_save_load_blogs_bag(self): train, test = self.split_blogs_corpus(BagOfWords) classifier = NaiveBayes() classifier.train(train) classifier.save('trained_model.p') c2 = NaiveBayes() c2.load('trained_model.p') self.assertEqual(classifier.model, c2.model) self.assertEqual(classifier.priorCount, c2.priorCount) self.assertEqual(classifier.countPerFeature, c2.countPerFeature)
tokens += nltk.regexp_tokenize(l,pattern="\w+") data = util.del_dup(tokens) if filepath[-4:-1] == 'neg': ins = Instance(filename,'negative',data,tokens) elif filepath[-4:-1] == 'pos': ins = Instance(filename,'positive',data,tokens) else: raise Exception, "Wrong path!" ins_list.append(ins) f.close() return ins_list instance_list = load_instance('txt_sentoken/neg/') instance_list += load_instance('txt_sentoken/pos/') #random split train-test p0 = random.random() p1 = 1-p0 prop = [p0,p1] #prop = [0.5,0.5] accuracy = split_train_test(nb,instance_list,prop,ID,limits) nb.save("movie_review_classifier.json") nb1 = NaiveBayes.load("movie_review_classifier.json") accuracy = split_train_test(nb,instance_list,prop,ID,limits) f = open('results/results'+repr(ID)+'.txt','a+w') f.write(repr(p0)+' '+repr(accuracy) + '\n') f.close()