def Boost_J48(data, rnm): data.class_is_last() fc1 = FilteredClassifier() fc1.classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.25", "-M", "2"]) fc1.filter = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "first"]) fc2 = SingleClassifierEnhancer(classname="weka.classifiers.meta.AdaBoostM1", options=["-P", "100", "-S", "1", "-I", "10"]) fc2.classifier = fc1 pred_output = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.CSV", options=["-p", "1"]) folds = 10 fc2.build_classifier(data) evaluation = Evaluation(data) evaluation.crossvalidate_model(fc2, data, folds, Random(1), pred_output) f0 = open(rnm + '_Boost_J48_Tree.txt', 'w') print >> f0, "Filename: ", rnm print >> f0, '\n\n' print >> f0, str(fc2) f0.close() f1 = open(rnm + '_Boost_J48_Prediction.txt', 'w') print >> f1, 'Filename:', rnm print >> f1, 'Prediction Summary:', (pred_output.buffer_content()) f1.close() f2 = open(rnm + '_Boost_j48_Evaluation.txt', 'w') print >> f2, 'Filename:', rnm print >> f2, 'Evaluation Summary:', (evaluation.summary()) print >> f2, '\n\n\n' print >> f2, (evaluation.class_details()) f2.close() plot_roc(evaluation, class_index=[0,1], title=rnm, key_loc='best', outfile=rnm + '_Boost_J48_ROC.png', wait=False) value_Boost_J48 = str(evaluation.percent_correct) return value_Boost_J48
def classification(data, train, test, num_clases): baseClassifiers_list = [ "weka.classifiers.bayes.NaiveBayes", "weka.classifiers.functions.MultilayerPerceptron", "weka.classifiers.functions.SMO", "weka.classifiers.lazy.IBk", "weka.classifiers.lazy.KStar", "weka.classifiers.meta.AdaBoostM1", "weka.classifiers.meta.Bagging", "weka.classifiers.meta.LogitBoost", "weka.classifiers.trees.J48", "weka.classifiers.trees.DecisionStump", "weka.classifiers.trees.LMT", "weka.classifiers.trees.RandomForest", "weka.classifiers.trees.REPTree", "weka.classifiers.rules.PART", "weka.classifiers.rules.JRip", "weka.classifiers.functions.Logistic", "weka.classifiers.meta.ClassificationViaRegression", "weka.classifiers.bayes.BayesNet" ] results_train = pd.DataFrame() results_test = pd.DataFrame() cost_matrix_list = [ "[]", "[0]", "[0.0 1.0; 1.0 0.0]", "[0.0 1.0 2.0; 1.0 0.0 1.0; 2.0 1.0 0.0]", "[0.0 1.0 2.0 3.0; 1.0 0.0 1.0 2.0; 2.0 1.0 0.0 1.0; 3.0 2.0 1.0 0.0]", "[0.0 1.0 2.0 3.0 4.0; 1.0 0.0 1.0 2.0 3.0; 2.0 1.0 0.0 1.0 2.0; 3.0 2.0 1.0 0.0 1.0; 4.0 3.0 2.0 1.0 0.0]", "[0.0 1.0 2.0 3.0 4.0 5.0; 1.0 0.0 1.0 2.0 3.0 4.0; 2.0 1.0 0.0 1.0 2.0 3.0; 3.0 2.0 1.0 0.0 1.0 2.0; 4.0 3.0 2.0 1.0 0.0 1.0; 5.0 4.0 3.0 2.0 1.0 0.0]", "[0.0 1.0 2.0 3.0 4.0 5.0 6.0; 1.0 0.0 1.0 2.0 3.0 4.0 5.0; 2.0 1.0 0.0 1.0 2.0 3.0 4.0; 3.0 2.0 1.0 0.0 1.0 2.0 3.0; 4.0 3.0 2.0 1.0 0.0 1.0 2.0; 5.0 4.0 3.0 2.0 1.0 0.0 1.0; 6.0 5.0 4.0 3.0 2.0 1.0 0.0]", "[0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0; 1.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0; 2.0 1.0 0.0 1.0 2.0 3.0 4.0 5.0; 3.0 2.0 1.0 0.0 1.0 2.0 3.0 4.0; 4.0 3.0 2.0 1.0 0.0 1.0 2.0 3.0; 5.0 4.0 3.0 2.0 1.0 0.0 1.0 2.0; 6.0 5.0 4.0 3.0 2.0 1.0 0.0 1.0; 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0]", "[0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0; 1.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0; 2.0 1.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0; 3.0 2.0 1.0 0.0 1.0 2.0 3.0 4.0 5.0; 4.0 3.0 2.0 1.0 0.0 1.0 2.0 3.0 4.0; 5.0 4.0 3.0 2.0 1.0 0.0 1.0 2.0 3.0; 6.0 5.0 4.0 3.0 2.0 1.0 0.0 1.0 2.0; 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0 1.0; 8.0 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0]", "[0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0; 1.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0; 2.0 1.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0; 3.0 2.0 1.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0; 4.0 3.0 2.0 1.0 0.0 1.0 2.0 3.0 4.0 5.0; 5.0 4.0 3.0 2.0 1.0 0.0 1.0 2.0 3.0 4.0; 6.0 5.0 4.0 3.0 2.0 1.0 0.0 1.0 2.0 3.0; 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0 1.0 2.0; 8.0 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0 1.0; 9.0 8.0 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0]" ] real_train = [] # the real label of the dataset for i in range(train.num_instances): real_train.append( train.get_instance(i).values[(train.num_attributes - 1)]) results_train['real'] = real_train real_test = [] # the real label of the dataset for i in range(test.num_instances): real_test.append( test.get_instance(i).values[(test.num_attributes - 1)]) results_test['real'] = real_test num = 0 for clas in baseClassifiers_list: column = "p" + np.str(num) #classifier classifier = SingleClassifierEnhancer( classname="weka.classifiers.meta.CostSensitiveClassifier", options=[ "-cost-matrix", cost_matrix_list[num_clases], "-M", "-S", "1" ]) base = Classifier(classname=clas) classifier.classifier = base predicted_data_train = None predicted_data_test = None evaluation = Evaluation(data) classifier.build_classifier(train) #evaluation.test_model(classifier, train) # add predictions addcls = Filter( classname="weka.filters.supervised.attribute.AddClassification", options=["-classification"]) addcls.set_property("classifier", Classifier.make_copy(classifier)) addcls.inputformat(train) #addcls.filter(train) # trains the classifier pred_train = addcls.filter(train) pred_test = addcls.filter(test) if predicted_data_train is None: predicted_data_train = Instances.template_instances(pred_train, 0) for n in range(pred_train.num_instances): predicted_data_train.add_instance(pred_train.get_instance(n)) if predicted_data_test is None: predicted_data_test = Instances.template_instances(pred_test, 0) for n in range(pred_test.num_instances): predicted_data_test.add_instance(pred_test.get_instance(n)) preds_train = [ ] #labels predicted for the classifer trained in the iteration preds_test = [] for i in range(predicted_data_train.num_instances): preds_train.append( predicted_data_train.get_instance(i).values[( predicted_data_train.num_attributes - 1)]) for i in range(predicted_data_test.num_instances): preds_test.append( predicted_data_test.get_instance(i).values[( predicted_data_test.num_attributes - 1)]) results_train[column] = preds_train results_test[column] = preds_test num = num + 1 return results_train, results_test