def test_model(self, test_data, empty_solution, evaluate = False): model_weka = None if os.path.isfile(self.prediction_file): print 'Model ' + self.name + ' already tested.' elif not os.path.isfile(self.model_file): print 'Impossible testing this model. It should be trained first.' return else: print 'Starting to test_model model ' + self.name + '.' model_weka = Classifier(jobject = serialization.read(self.model_file)) evaluation = Evaluation(data = test_data) evaluation.test_model(classifier = model_weka, data = test_data) predictions = evaluation.predictions() rows = read_sheet(file_name = empty_solution) solutions = [] for row in rows: solution = [row['userid'], row['tweetid'], predictions.pop(0).predicted()] solutions.append(solution) write_the_solution_file(solutions, self.prediction_file) print 'Model ' + self.name + ' tested.' if evaluate == True: if os.path.isfile(self.evaluation_file): print 'Model ' + self.name + ' already evaluated.' return elif model_weka == None: model_weka = Classifier(jobject = serialization.read(self.model_file)) evaluation = Evaluation(data = test_data) evaluation.test_model(classifier = model_weka, data = test_data) save_file(file_name = self.evaluation_file, content = evaluation.to_summary()) print 'Model ' + self.name + ' evaluated.'
print("\nLoading dataset: " + fname + "\n") loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(fname) data.set_class_index(data.num_attributes() - 1) # plot pld.scatter_plot( data, data.get_attribute_by_name("petalwidth").get_index(), data.get_attribute_by_name("petallength").get_index(), wait=False) # add classifier errors to dataset addcls = Filter( classname="weka.filters.supervised.attribute.AddClassification", options=["-W", "weka.classifiers.trees.J48", "-classification", "-error"]) addcls.set_inputformat(data) filtered = addcls.filter(data) print(filtered) # build J48 cls = Classifier(classname="weka.classifiers.trees.J48") cls.build_classifier(data) evl = Evaluation(data) evl.test_model(cls, data) # plot classifier errors plc.plot_classifier_errors(evl.predictions(), wait=True) jvm.stop()
jvm.start() # load credit-g fname = data_dir + os.sep + "credit-g.arff" print("\nLoading dataset: " + fname + "\n") loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(fname) data.set_class_index(data.num_attributes() - 1) # cross-validate NaiveBayes classifier = "weka.classifiers.bayes.NaiveBayes" print("\n--> " + classifier + "\n") cls = Classifier(classname=classifier) evl = Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(1)) preds = classifiers.predictions_to_instances(data, evl.predictions()) preds.sort(preds.get_attribute_by_name("distribution-good").get_index()) print(evl.to_summary()) print(evl.to_matrix()) print(preds) # cross-validate J48 classifier = "weka.classifiers.trees.J48" print("\n--> " + classifier + "\n") cls = Classifier(classname=classifier, options=["-M", "100"]) evl = Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(1)) preds = classifiers.predictions_to_instances(data, evl.predictions()) preds.sort(preds.get_attribute_by_name("distribution-good").get_index()) print(evl.to_summary()) print(evl.to_matrix())