def CV5x2(dataset, algo, num_datasets): loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(dataset) data.class_is_last() cls = Classifier(classname=algo) evl = Evaluation(data) evl.crossvalidate_model(cls, data, 2, Random(5)) print(evl.summary("=== " +str(algo)+ " on" + str(dataset) + " ===",False)) print(evl.matrix("=== on click prediction(confusion matrix) ===")) print("For Algo"+ str(algo)+"areaUnderROC/1: for CV5x2 " + str(evl.area_under_roc(1))) return evl.area_under_roc(1)
def crossTest(this, trainingFile, classifier, testFile): loader = Loader(classname="weka.core.converters.ArffLoader") data1 = loader.load_file(trainingFile) data1.class_is_last() cls = Classifier(classname=classifier) cls.build_classifier(data1) data2 = loader.load_file(testFile) data2.class_is_last() classes = [str(code) for code in data2.class_attribute.values] header = ["Accuracy"] for name in classes: header += [name + " TP", name + " FP", name + " AUC ROC"] values = [] evl = Evaluation(data2) evl.test_model(cls, data2) values.append(evl.percent_correct) for name in classes: index = classes.index(name) values += [ evl.true_positive_rate(index) * 100, evl.false_positive_rate(index) * 100, evl.area_under_roc(index) ] this.values = values this.header = header
def runCV(this, arffFile, classifier, folds): loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(arffFile) data.class_is_last() classes = [str(code) for code in data.class_attribute.values] header = ["Accuracy"] for name in classes: header += [name + " TP", name + " FP", name + " AUC ROC"] values = [] cls = Classifier(classname=classifier) evl = Evaluation(data) evl.crossvalidate_model(cls, data, folds, Random(1)) values.append(evl.percent_correct) for name in classes: index = classes.index(name) values += [ evl.true_positive_rate(index) * 100, evl.false_positive_rate(index) * 100, evl.area_under_roc(index) ] this.values = values this.header = header
def HOV(dataset, algo, num_datasets): #Executing HOV \_*-*_/ loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(dataset) data.class_is_last() train, test = data.train_test_split(70.0, Random(10)) cls = Classifier(classname=algo) cls.build_classifier(train) evl = Evaluation(train) evl.test_model(cls, test) print(evl.summary("=== " +str(algo)+ " on" + str(dataset) + " ===",False)) print(evl.matrix("=== on click prediction(confusion matrix) ===")) print("For Algo"+ str(algo)+"areaUnderROC/1: for HOV " + str(evl.area_under_roc(1))) return evl.area_under_roc(1)
def ClassifyWithDT(f3, test, tree, fileOut): eval = Evaluation(f3) tree.build_classifier(f3) eval.test_model(tree, test) print("\n\nSelf-Training data========" + str((1 - eval.error_rate) * 100) + " number of instances==" + str(f3.num_instances) + "\n") print("\n Error Rate==" + str(eval.error_rate) + "\n") print("\n precision recall areaUnderROC \n\n") for i in range(test.get_instance(0).num_classes): print( str(eval.precision(i)) + " " + str(eval.recall(i)) + " " + str(eval.area_under_roc(i)) + "\n") return eval
def CV10(dataset, algo): print "inside 10cv" print("dataset ----" + dataset) print("algorithm ----" + algo) #Executing 10FCV # jvm.start(packages=True) loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(dataset) data.class_is_last() #print(data) cls = Classifier(classname=algo) evl = Evaluation(data) evl.crossvalidate_model(cls, data, 2, Random(5)) print("areaUnderROC/1: " + str(evl.area_under_roc(1)))
def weka_bayesnet(filearffpath='data/datatobayes.arff'): """Simple calling of the bayesian network from python. """ #Preparing the data loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file('data/datatobayes.arff') #data = loader.load_file('data/Full.arff') remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "first"]) remove.inputformat(data) filtered = data #remove.filter(data) #Classifier test from weka.classifiers import Classifier, Evaluation from weka.core.classes import Random filtered.class_is_last() classifier = Classifier(classname="weka.classifiers.bayes.BayesNet", options=['-D']) # evaluation = Evaluation(filtered) evaluation.crossvalidate_model(classifier, filtered, 10, Random(42)) return evaluation.area_under_roc(class_index=0) #ROC, no std of kfold
def HOV(dataset, algo): print "inside hov" print("dataset ----" + dataset) print("algorithm ----" + algo) #Executing HOV \_*-*_/ # jvm.start(packages=True) loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(dataset) data.class_is_last() train, test = data.train_test_split(70.0, Random(10)) cls = Classifier(classname=algo) cls.build_classifier(train) evl = Evaluation(train) evl.test_model(cls, test) return (str(evl.area_under_roc(1)))
dataLastTrain.class_is_last() dataLastTest.class_is_last() from weka.classifiers import Evaluation from weka.core.classes import Random from weka.classifiers import Classifier if classifier == 0: for kernel in range(0,2): if kernel == 0: mapper = Classifier(classname="weka.classifiers.misc.InputMappedClassifier", options=["-M","-W", "weka.classifiers.bayes.NaiveBayes"]) Class = 'NaiveBayes' mapper.build_classifier(dataTrain) evaluation = Evaluation(dataTrain) evaluation.test_model(mapper,dataTest) Scores.write(str(evaluation.area_under_roc(1)*100) + ',') recall_NB.append(evaluation.recall(1)*100) precision_NB.append(evaluation.precision(1)*100) mapper.build_classifier(dataLastTrain) evaluation = Evaluation(dataLastTrain) evaluation.test_model(mapper, dataLastTest) ScoresLast.write(str(evaluation.area_under_roc(1) * 100)+',') else: mapper = Classifier(classname="weka.classifiers.misc.InputMappedClassifier", options=["-M","-W", "weka.classifiers.bayes.NaiveBayes", "--", "-K"]) Class = 'NaiveBayes'
eval = Evaluation(labledDataSet) eval.test_model(tree, test) fileOut.write("Labeled data======== " + str((1.0 - eval.error_rate )* 100) + " number of instances== " + str(labledDataSet.num_instances) + "\n") Newtrainpool = LabeledUnlabeldata(labledDataSet, UnlabledDataSet, tree, y ) # Newtrainpool = LabeledUnlabeldata(labledDataSet, UnlabledDataSet, tree, y , cal_method=Method) fileOut.write("\n\nLabeled data======== " + str((1.0 - eval.error_rate )* 100) + " number of instances== " + str(labledDataSet.num_instances) + "\n") fileOut.write(" Decision Tree \n") fileOut.write("\n precision recall areaUnderROC \n\n") for i in range(test.get_instance(0).num_classes) : fileOut.write(str(eval.precision(i)) +" "+str(eval.recall(i)) + " " + str(eval.area_under_roc(i))+"\n") ClassifyWithDT(Newtrainpool, test, tree, fileOut ) fileOut.write("\n") fileOut.write("########################################################\n") fileOut.write("\n") except Exception as e: raise e fileOut.write("\n") fileOut.write("\n") fileOut.write("########################################################\n")
def main(): """ Just runs some example code. """ # load a dataset iris_file = helper.get_data_dir() + os.sep + "iris.arff" helper.print_info("Loading dataset: " + iris_file) loader = Loader("weka.core.converters.ArffLoader") iris_data = loader.load_file(iris_file) iris_data.class_is_last() # classifier help helper.print_title("Creating help string") classifier = Classifier(classname="weka.classifiers.trees.J48") print(classifier.to_help()) # partial classname helper.print_title("Creating classifier from partial classname") clsname = ".J48" classifier = Classifier(classname=clsname) print(clsname + " --> " + classifier.classname) # classifier from commandline helper.print_title("Creating SMO from command-line string") cmdline = 'weka.classifiers.functions.SMO -K "weka.classifiers.functions.supportVector.NormalizedPolyKernel -E 3.0"' classifier = from_commandline(cmdline, classname="weka.classifiers.Classifier") classifier.build_classifier(iris_data) print("input: " + cmdline) print("output: " + classifier.to_commandline()) print("model:\n" + str(classifier)) # kernel classifier helper.print_title("Creating SMO as KernelClassifier") kernel = Kernel(classname="weka.classifiers.functions.supportVector.RBFKernel", options=["-G", "0.001"]) classifier = KernelClassifier(classname="weka.classifiers.functions.SMO", options=["-M"]) classifier.kernel = kernel classifier.build_classifier(iris_data) print("classifier: " + classifier.to_commandline()) print("model:\n" + str(classifier)) # build a classifier and output model helper.print_title("Training J48 classifier on iris") classifier = Classifier(classname="weka.classifiers.trees.J48") # Instead of using 'options=["-C", "0.3"]' in the constructor, we can also set the "confidenceFactor" # property of the J48 classifier itself. However, being of type float rather than double, we need # to convert it to the correct type first using the double_to_float function: classifier.set_property("confidenceFactor", typeconv.double_to_float(0.3)) classifier.build_classifier(iris_data) print(classifier) print(classifier.graph) print(classifier.to_source("MyJ48")) plot_graph.plot_dot_graph(classifier.graph) # evaluate model on test set helper.print_title("Evaluating J48 classifier on iris") evaluation = Evaluation(iris_data) evl = evaluation.test_model(classifier, iris_data) print(evl) print(evaluation.summary()) # evaluate model on train/test split helper.print_title("Evaluating J48 classifier on iris (random split 66%)") classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"]) evaluation = Evaluation(iris_data) evaluation.evaluate_train_test_split(classifier, iris_data, 66.0, Random(1)) print(evaluation.summary()) # load a dataset incrementally and build classifier incrementally helper.print_title("Build classifier incrementally on iris") helper.print_info("Loading dataset: " + iris_file) loader = Loader("weka.core.converters.ArffLoader") iris_inc = loader.load_file(iris_file, incremental=True) iris_inc.class_is_last() classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable") classifier.build_classifier(iris_inc) for inst in loader: classifier.update_classifier(inst) print(classifier) # construct meta-classifiers helper.print_title("Meta classifiers") # generic FilteredClassifier instantiation print("generic FilteredClassifier instantiation") meta = SingleClassifierEnhancer(classname="weka.classifiers.meta.FilteredClassifier") meta.classifier = Classifier(classname="weka.classifiers.functions.LinearRegression") flter = Filter("weka.filters.unsupervised.attribute.Remove") flter.options = ["-R", "first"] meta.set_property("filter", flter.jobject) print(meta.to_commandline()) # direct FilteredClassifier instantiation print("direct FilteredClassifier instantiation") meta = FilteredClassifier() meta.classifier = Classifier(classname="weka.classifiers.functions.LinearRegression") flter = Filter("weka.filters.unsupervised.attribute.Remove") flter.options = ["-R", "first"] meta.filter = flter print(meta.to_commandline()) # generic Vote print("generic Vote instantiation") meta = MultipleClassifiersCombiner(classname="weka.classifiers.meta.Vote") classifiers = [ Classifier(classname="weka.classifiers.functions.SMO"), Classifier(classname="weka.classifiers.trees.J48") ] meta.classifiers = classifiers print(meta.to_commandline()) # cross-validate nominal classifier helper.print_title("Cross-validating NaiveBayes on diabetes") diabetes_file = helper.get_data_dir() + os.sep + "diabetes.arff" helper.print_info("Loading dataset: " + diabetes_file) loader = Loader("weka.core.converters.ArffLoader") diabetes_data = loader.load_file(diabetes_file) diabetes_data.class_is_last() classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayes") pred_output = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.PlainText", options=["-distribution"]) evaluation = Evaluation(diabetes_data) evaluation.crossvalidate_model(classifier, diabetes_data, 10, Random(42), output=pred_output) print(evaluation.summary()) print(evaluation.class_details()) print(evaluation.matrix()) print("areaUnderPRC/0: " + str(evaluation.area_under_prc(0))) print("weightedAreaUnderPRC: " + str(evaluation.weighted_area_under_prc)) print("areaUnderROC/1: " + str(evaluation.area_under_roc(1))) print("weightedAreaUnderROC: " + str(evaluation.weighted_area_under_roc)) print("avgCost: " + str(evaluation.avg_cost)) print("totalCost: " + str(evaluation.total_cost)) print("confusionMatrix: " + str(evaluation.confusion_matrix)) print("correct: " + str(evaluation.correct)) print("pctCorrect: " + str(evaluation.percent_correct)) print("incorrect: " + str(evaluation.incorrect)) print("pctIncorrect: " + str(evaluation.percent_incorrect)) print("unclassified: " + str(evaluation.unclassified)) print("pctUnclassified: " + str(evaluation.percent_unclassified)) print("coverageOfTestCasesByPredictedRegions: " + str(evaluation.coverage_of_test_cases_by_predicted_regions)) print("sizeOfPredictedRegions: " + str(evaluation.size_of_predicted_regions)) print("falseNegativeRate: " + str(evaluation.false_negative_rate(1))) print("weightedFalseNegativeRate: " + str(evaluation.weighted_false_negative_rate)) print("numFalseNegatives: " + str(evaluation.num_false_negatives(1))) print("trueNegativeRate: " + str(evaluation.true_negative_rate(1))) print("weightedTrueNegativeRate: " + str(evaluation.weighted_true_negative_rate)) print("numTrueNegatives: " + str(evaluation.num_true_negatives(1))) print("falsePositiveRate: " + str(evaluation.false_positive_rate(1))) print("weightedFalsePositiveRate: " + str(evaluation.weighted_false_positive_rate)) print("numFalsePositives: " + str(evaluation.num_false_positives(1))) print("truePositiveRate: " + str(evaluation.true_positive_rate(1))) print("weightedTruePositiveRate: " + str(evaluation.weighted_true_positive_rate)) print("numTruePositives: " + str(evaluation.num_true_positives(1))) print("fMeasure: " + str(evaluation.f_measure(1))) print("weightedFMeasure: " + str(evaluation.weighted_f_measure)) print("unweightedMacroFmeasure: " + str(evaluation.unweighted_macro_f_measure)) print("unweightedMicroFmeasure: " + str(evaluation.unweighted_micro_f_measure)) print("precision: " + str(evaluation.precision(1))) print("weightedPrecision: " + str(evaluation.weighted_precision)) print("recall: " + str(evaluation.recall(1))) print("weightedRecall: " + str(evaluation.weighted_recall)) print("kappa: " + str(evaluation.kappa)) print("KBInformation: " + str(evaluation.kb_information)) print("KBMeanInformation: " + str(evaluation.kb_mean_information)) print("KBRelativeInformation: " + str(evaluation.kb_relative_information)) print("SFEntropyGain: " + str(evaluation.sf_entropy_gain)) print("SFMeanEntropyGain: " + str(evaluation.sf_mean_entropy_gain)) print("SFMeanPriorEntropy: " + str(evaluation.sf_mean_prior_entropy)) print("SFMeanSchemeEntropy: " + str(evaluation.sf_mean_scheme_entropy)) print("matthewsCorrelationCoefficient: " + str(evaluation.matthews_correlation_coefficient(1))) print("weightedMatthewsCorrelation: " + str(evaluation.weighted_matthews_correlation)) print("class priors: " + str(evaluation.class_priors)) print("numInstances: " + str(evaluation.num_instances)) print("meanAbsoluteError: " + str(evaluation.mean_absolute_error)) print("meanPriorAbsoluteError: " + str(evaluation.mean_prior_absolute_error)) print("relativeAbsoluteError: " + str(evaluation.relative_absolute_error)) print("rootMeanSquaredError: " + str(evaluation.root_mean_squared_error)) print("rootMeanPriorSquaredError: " + str(evaluation.root_mean_prior_squared_error)) print("rootRelativeSquaredError: " + str(evaluation.root_relative_squared_error)) print("prediction output:\n" + str(pred_output)) plot_cls.plot_roc( evaluation, title="ROC diabetes", class_index=range(0, diabetes_data.class_attribute.num_values), wait=False) plot_cls.plot_prc( evaluation, title="PRC diabetes", class_index=range(0, diabetes_data.class_attribute.num_values), wait=False) # load a numeric dataset bolts_file = helper.get_data_dir() + os.sep + "bolts.arff" helper.print_info("Loading dataset: " + bolts_file) loader = Loader("weka.core.converters.ArffLoader") bolts_data = loader.load_file(bolts_file) bolts_data.class_is_last() # build a classifier and output model helper.print_title("Training LinearRegression on bolts") classifier = Classifier(classname="weka.classifiers.functions.LinearRegression", options=["-S", "1", "-C"]) classifier.build_classifier(bolts_data) print(classifier) # cross-validate numeric classifier helper.print_title("Cross-validating LinearRegression on bolts") classifier = Classifier(classname="weka.classifiers.functions.LinearRegression", options=["-S", "1", "-C"]) evaluation = Evaluation(bolts_data) evaluation.crossvalidate_model(classifier, bolts_data, 10, Random(42)) print(evaluation.summary()) print("correlationCoefficient: " + str(evaluation.correlation_coefficient)) print("errorRate: " + str(evaluation.error_rate)) helper.print_title("Header - bolts") print(str(evaluation.header)) helper.print_title("Predictions on bolts") for index, pred in enumerate(evaluation.predictions): print(str(index+1) + ": " + str(pred) + " -> error=" + str(pred.error)) plot_cls.plot_classifier_errors(evaluation.predictions, wait=False) # learning curve cls = [ Classifier(classname="weka.classifiers.trees.J48"), Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")] plot_cls.plot_learning_curve( cls, diabetes_data, increments=0.05, label_template="[#] !", metric="percent_correct", wait=True) # access classifier's Java API labor_file = helper.get_data_dir() + os.sep + "labor.arff" helper.print_info("Loading dataset: " + labor_file) loader = Loader("weka.core.converters.ArffLoader") labor_data = loader.load_file(labor_file) labor_data.class_is_last() helper.print_title("Using JRip's Java API to access rules") jrip = Classifier(classname="weka.classifiers.rules.JRip") jrip.build_classifier(labor_data) rset = jrip.jwrapper.getRuleset() for i in range(rset.size()): r = rset.get(i) print(str(r.toString(labor_data.class_attribute.jobject)))
if classifier == 0: for kernel in range(0, 1): if kernel == 0: mapper = Classifier( classname= "weka.classifiers.misc.InputMappedClassifier", options=[ '-M', "-W", "weka.classifiers.bayes.NaiveBayes" ]) Class = 'NaiveBayes' mapper.build_classifier(dataTrain) evaluation = Evaluation(dataTrain) evaluation.test_model(mapper, dataTest) roc_aux_NB.append( evaluation.area_under_roc(1) * 100) recall_aux_NB.append( evaluation.recall(1) * 100) precision_aux_NB.append( evaluation.precision(1) * 100) elif classifier == 1: for degree in range(3, 4): mapper = Classifier( classname= "weka.classifiers.misc.InputMappedClassifier", options=[ '-M', "-W", "weka.classifiers.functions.SMO", "--", "-K", "weka.classifiers.functions.supportVector.PolyKernel -E "
import weka.core.jvm as jvm import weka.core.converters as conv from weka.classifiers import Evaluation, Classifier from weka.core.classes import Random import weka.plot.classifiers as plcls # NB: matplotlib is required import os data_dir = "/home/suruchi/Desktop/BTECH Pro/new/click_prediction/" jvm.start(packages=True) from weka.core.converters import Loader loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(data_dir + "click_prediction.arff") data.class_is_last() #print(data) cls = Classifier(classname="weka.classifiers.bayes.NaiveBayes") evl = Evaluation(data) evl.crossvalidate_model(cls, data, 2, Random(5)) print(evl.summary("=== NaiveBayes on click prediction (stats) ===", False)) print(evl.matrix("=== NaiveBayes on click prediction(confusion matrix) ===")) #plcls.plot_classifier_errors(evl.predictions, absolute=False,wait = True) plcls.plot_roc(evl, class_index=[0, 1], wait=True) print("areaUnderROC/1: " + str(evl.area_under_roc(1))) jvm.stop()
for kernel in range(0, 1): if kernel == 0: mapper = Classifier( classname= "weka.classifiers.misc.InputMappedClassifier", options=[ "-M", "-W", "weka.classifiers.bayes.NaiveBayes" ]) Class = 'NaiveBayes' mapper.build_classifier(dataTrainSlow) evaluation = Evaluation(dataTrainSlow) evaluation.test_model(mapper, dataTestSlow) NB_AUC[seed - 1, fold - 1, 0] = (evaluation.area_under_roc(1) * 100) NB_Recall[seed - 1, fold - 1, 0] = (evaluation.recall(yIndexSlow) * 100) NB_Precision[seed - 1, fold - 1, 0] = ( evaluation.precision(yIndexSlow) * 100) if window == 365: mapper = Classifier( classname= "weka.classifiers.misc.InputMappedClassifier", options=[ "-M", "-W", "weka.classifiers.bayes.NaiveBayes", '--', '-K'
y, cal_method=Method) print("\n\nLabeled data======== " + str((1.0 - eval.error_rate) * 100) + " number of instances== " + str(labledDataSet.num_instances) + "\n") print(" Decision Tree \n") print( "\n precision recall areaUnderROC \n\n") for i in range(test.get_instance(0).num_classes): print( str(eval.precision(i)) + " " + str(eval.recall(i)) + " " + str(eval.area_under_roc(i)) + "\n") ClassifyWithDT(Newtrainpool, test, tree, fileOut) print("\n") print("########################################################\n") print("\n") except Exception as e: raise e print("\n") print("\n") print("########################################################\n") print("########################################################\n") print("########################################################\n")
RF = Classifier( classname="weka.classifiers.misc.InputMappedClassifier", options=[ "-M", "-W", "weka.classifiers.trees.RandomForest", "--", "-I", '20' ]) Class = 'NaiveBayes' RF.build_classifier(dataTrain) evaluationRF = Evaluation(dataTrain) evaluationRF.test_model(RF, dataTest) if dataset == 'First': Scores.write( str(window) + ',' + str(np.round(evaluationNB.area_under_roc(1) * 100, 2)) + ',' + str(np.round(evaluationRF.area_under_roc(1) * 100, 2)) + '\n') else: ScoresLast.write( str(window) + ',' + str(np.round(evaluationNB.area_under_roc(1) * 100, 2)) + ',' + str(np.round(evaluationRF.area_under_roc(1) * 100, 2)) + '\n') if ntp == 2 and dataset == 'First': Perf.write( '\multirow{8}{*}{' + str(window) + 'd}' + ' & ' +
# #print(cls.options) # cls.build_classifier(dataTrain) from weka.classifiers import Evaluation #print("Evaluating NB classifier") evaluation = Evaluation(dataTrain) evl = evaluation.test_model(mapper, dataTest) print('Window' + str(Window[window]) + '_S' + str(seed) + '_Fold' + str(fold) + ': Performance') #print(evaluation.summary()) #print(evaluation.class_details()) #print(evaluation.matrix()) #print(evaluation.summary()) #print(evaluation.class_details()) #print(evaluation.matrix()) roc.append(evaluation.area_under_roc(1)) sens.append(evaluation.true_positive_rate(1)) spec.append(evaluation.true_negative_rate(1)) if fold == 10 and seed == 5: print('Window' + str(Window[window]) + '_S' + str(seed) + '_Fold' + str(fold) + ': Performance') print('AUC: ' + str(np.mean(roc))) print('Sens: ' + str(np.mean(sens))) print('Spec:' + str(np.mean(spec))) Perf.write('Window' + str(Window[window]) + ': Performance\n\n') Perf.write('AUC: ' + str(np.mean(roc)) + '\n') Perf.write('Sens: ' + str(np.mean(sens)) + '\n') Perf.write('Spec:' + str(np.mean(spec)) + '\n') except:
RF = Classifier(classname="weka.classifiers.misc.InputMappedClassifier", options=["-M", "-W", "weka.classifiers.bayes.NaiveBayes"]) # Class = 'NaiveBayes' # NB.build_classifier(dataTrain) # evaluationNB = Evaluation(dataTrain) # evaluationNB.test_model(NB, dataTest) # RF = Classifier(classname="weka.classifiers.misc.InputMappedClassifier", # options=["-M", "-W", "weka.classifiers.trees.RandomForest", "--", "-I", # '20']) Class = 'NaiveBayes' RF.build_classifier(dataTrain) evaluationRF = Evaluation(dataTrain) evaluationRF.test_model(RF, dataTest) print(evaluationRF.area_under_roc(1)) if ntp == 2 and dataset == 'Slow': Perf.write( '\multirow{6}{*}{' + str(window) + 'd}' + ' & ' + '\multirow{3}{*}{' + str(ntp) + '}' + ' & ' + dataset + ' & ' + str(np.round(evaluationRF.area_under_roc(1) * 100, 2)) + ' & ' + str(np.round(evaluationRF.precision(yIndex) * 100, 2)) + ' & ' + str(np.round(evaluationRF.recall(yIndex) * 100, 2)) + '\\\\\n') # Precision.write( # '\multirow{8}{*}{' + str(window) + 'd}' + ' & ' + '\multirow{2}{*}{' + str( # ntp) + '}' + ' & ' + dataset + ' & ' + str( # np.round(evaluationNB.precision(yIndex) * 100, 2)) + ' & ' + str( # np.round(evaluationRF.precision(yIndex) * 100, 2)) + '\\\\\n') # # Recall.write('\multirow{8}{*}{' + str(window) + 'd}' + ' & ' + '\multirow{2}{*}{' + str( # ntp) + '}' + ' & ' + dataset + ' & ' + str(np.round(evaluationNB.recall(yIndex) * 100, 2)) + ' & ' + str( # np.round(evaluationRF.recall(yIndex) * 100, 2)) + '\\\\\n')
from weka.classifiers import Classifier if classifier == 0: SMOTE = Filter(classname="weka.filters.supervised.instance.SMOTE", options=['-P', str(smote)]) SMOTE.inputformat(dataTrain) dataTrain = SMOTE.filter(dataTrain) SMOTE.inputformat(dataLastTrain) dataLastTrain = SMOTE.filter(dataLastTrain) for kernel in range(0,1): if kernel == 0: mapper = Classifier(classname="weka.classifiers.misc.InputMappedClassifier", options=["-M","-W", "weka.classifiers.bayes.NaiveBayes"]) Class = 'NaiveBayes' mapper.build_classifier(dataTrain) evaluation = Evaluation(dataTrain) evaluation.test_model(mapper,dataTest) roc_NB.append(evaluation.area_under_roc(1)*100) recall_NB.append(evaluation.recall(yIndex)*100) precision_NB.append(evaluation.precision(yIndex)*100) mapper.build_classifier(dataLastTrain) evaluation = Evaluation(dataLastTrain) evaluation.test_model(mapper, dataLastTest) roc_NB_Last.append(evaluation.area_under_roc(1) * 100) recall_NB_Last.append(evaluation.recall(yIndex) * 100) precision_NB_Last.append(evaluation.precision(yIndex) * 100) elif classifier == 1: for degree in [2]: mapper = Classifier(classname="weka.classifiers.misc.InputMappedClassifier", options=["-M","-W", "weka.classifiers.functions.SMO", "--", "-K","weka.classifiers.functions.supportVector.PolyKernel -E " + str(degree)]) Class = 'SVM'
def main(): """ Just runs some example code. """ # load a dataset iris_file = helper.get_data_dir() + os.sep + "iris.arff" helper.print_info("Loading dataset: " + iris_file) loader = Loader("weka.core.converters.ArffLoader") iris_data = loader.load_file(iris_file) iris_data.class_is_last() # classifier help helper.print_title("Creating help string") classifier = Classifier(classname="weka.classifiers.trees.J48") print(classifier.to_help()) # partial classname helper.print_title("Creating classifier from partial classname") clsname = ".J48" classifier = Classifier(classname=clsname) print(clsname + " --> " + classifier.classname) # classifier from commandline helper.print_title("Creating SMO from command-line string") cmdline = 'weka.classifiers.functions.SMO -K "weka.classifiers.functions.supportVector.NormalizedPolyKernel -E 3.0"' classifier = from_commandline(cmdline, classname="weka.classifiers.Classifier") classifier.build_classifier(iris_data) print("input: " + cmdline) print("output: " + classifier.to_commandline()) print("model:\n" + str(classifier)) # kernel classifier helper.print_title("Creating SMO as KernelClassifier") kernel = Kernel( classname="weka.classifiers.functions.supportVector.RBFKernel", options=["-G", "0.001"]) classifier = KernelClassifier(classname="weka.classifiers.functions.SMO", options=["-M"]) classifier.kernel = kernel classifier.build_classifier(iris_data) print("classifier: " + classifier.to_commandline()) print("model:\n" + str(classifier)) # build a classifier and output model helper.print_title("Training J48 classifier on iris") classifier = Classifier(classname="weka.classifiers.trees.J48") # Instead of using 'options=["-C", "0.3"]' in the constructor, we can also set the "confidenceFactor" # property of the J48 classifier itself. However, being of type float rather than double, we need # to convert it to the correct type first using the double_to_float function: classifier.set_property("confidenceFactor", types.double_to_float(0.3)) classifier.build_classifier(iris_data) print(classifier) print(classifier.graph) print(classifier.to_source("MyJ48")) plot_graph.plot_dot_graph(classifier.graph) # evaluate model on test set helper.print_title("Evaluating J48 classifier on iris") evaluation = Evaluation(iris_data) evl = evaluation.test_model(classifier, iris_data) print(evl) print(evaluation.summary()) # evaluate model on train/test split helper.print_title("Evaluating J48 classifier on iris (random split 66%)") classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"]) evaluation = Evaluation(iris_data) evaluation.evaluate_train_test_split(classifier, iris_data, 66.0, Random(1)) print(evaluation.summary()) # load a dataset incrementally and build classifier incrementally helper.print_title("Build classifier incrementally on iris") helper.print_info("Loading dataset: " + iris_file) loader = Loader("weka.core.converters.ArffLoader") iris_inc = loader.load_file(iris_file, incremental=True) iris_inc.class_is_last() classifier = Classifier( classname="weka.classifiers.bayes.NaiveBayesUpdateable") classifier.build_classifier(iris_inc) for inst in loader: classifier.update_classifier(inst) print(classifier) # construct meta-classifiers helper.print_title("Meta classifiers") # generic FilteredClassifier instantiation print("generic FilteredClassifier instantiation") meta = SingleClassifierEnhancer( classname="weka.classifiers.meta.FilteredClassifier") meta.classifier = Classifier( classname="weka.classifiers.functions.LinearRegression") flter = Filter("weka.filters.unsupervised.attribute.Remove") flter.options = ["-R", "first"] meta.set_property("filter", flter.jobject) print(meta.to_commandline()) # direct FilteredClassifier instantiation print("direct FilteredClassifier instantiation") meta = FilteredClassifier() meta.classifier = Classifier( classname="weka.classifiers.functions.LinearRegression") flter = Filter("weka.filters.unsupervised.attribute.Remove") flter.options = ["-R", "first"] meta.filter = flter print(meta.to_commandline()) # generic Vote print("generic Vote instantiation") meta = MultipleClassifiersCombiner(classname="weka.classifiers.meta.Vote") classifiers = [ Classifier(classname="weka.classifiers.functions.SMO"), Classifier(classname="weka.classifiers.trees.J48") ] meta.classifiers = classifiers print(meta.to_commandline()) # cross-validate nominal classifier helper.print_title("Cross-validating NaiveBayes on diabetes") diabetes_file = helper.get_data_dir() + os.sep + "diabetes.arff" helper.print_info("Loading dataset: " + diabetes_file) loader = Loader("weka.core.converters.ArffLoader") diabetes_data = loader.load_file(diabetes_file) diabetes_data.class_is_last() classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayes") pred_output = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.PlainText", options=["-distribution"]) evaluation = Evaluation(diabetes_data) evaluation.crossvalidate_model(classifier, diabetes_data, 10, Random(42), output=pred_output) print(evaluation.summary()) print(evaluation.class_details()) print(evaluation.matrix()) print("areaUnderPRC/0: " + str(evaluation.area_under_prc(0))) print("weightedAreaUnderPRC: " + str(evaluation.weighted_area_under_prc)) print("areaUnderROC/1: " + str(evaluation.area_under_roc(1))) print("weightedAreaUnderROC: " + str(evaluation.weighted_area_under_roc)) print("avgCost: " + str(evaluation.avg_cost)) print("totalCost: " + str(evaluation.total_cost)) print("confusionMatrix: " + str(evaluation.confusion_matrix)) print("correct: " + str(evaluation.correct)) print("pctCorrect: " + str(evaluation.percent_correct)) print("incorrect: " + str(evaluation.incorrect)) print("pctIncorrect: " + str(evaluation.percent_incorrect)) print("unclassified: " + str(evaluation.unclassified)) print("pctUnclassified: " + str(evaluation.percent_unclassified)) print("coverageOfTestCasesByPredictedRegions: " + str(evaluation.coverage_of_test_cases_by_predicted_regions)) print("sizeOfPredictedRegions: " + str(evaluation.size_of_predicted_regions)) print("falseNegativeRate: " + str(evaluation.false_negative_rate(1))) print("weightedFalseNegativeRate: " + str(evaluation.weighted_false_negative_rate)) print("numFalseNegatives: " + str(evaluation.num_false_negatives(1))) print("trueNegativeRate: " + str(evaluation.true_negative_rate(1))) print("weightedTrueNegativeRate: " + str(evaluation.weighted_true_negative_rate)) print("numTrueNegatives: " + str(evaluation.num_true_negatives(1))) print("falsePositiveRate: " + str(evaluation.false_positive_rate(1))) print("weightedFalsePositiveRate: " + str(evaluation.weighted_false_positive_rate)) print("numFalsePositives: " + str(evaluation.num_false_positives(1))) print("truePositiveRate: " + str(evaluation.true_positive_rate(1))) print("weightedTruePositiveRate: " + str(evaluation.weighted_true_positive_rate)) print("numTruePositives: " + str(evaluation.num_true_positives(1))) print("fMeasure: " + str(evaluation.f_measure(1))) print("weightedFMeasure: " + str(evaluation.weighted_f_measure)) print("unweightedMacroFmeasure: " + str(evaluation.unweighted_macro_f_measure)) print("unweightedMicroFmeasure: " + str(evaluation.unweighted_micro_f_measure)) print("precision: " + str(evaluation.precision(1))) print("weightedPrecision: " + str(evaluation.weighted_precision)) print("recall: " + str(evaluation.recall(1))) print("weightedRecall: " + str(evaluation.weighted_recall)) print("kappa: " + str(evaluation.kappa)) print("KBInformation: " + str(evaluation.kb_information)) print("KBMeanInformation: " + str(evaluation.kb_mean_information)) print("KBRelativeInformation: " + str(evaluation.kb_relative_information)) print("SFEntropyGain: " + str(evaluation.sf_entropy_gain)) print("SFMeanEntropyGain: " + str(evaluation.sf_mean_entropy_gain)) print("SFMeanPriorEntropy: " + str(evaluation.sf_mean_prior_entropy)) print("SFMeanSchemeEntropy: " + str(evaluation.sf_mean_scheme_entropy)) print("matthewsCorrelationCoefficient: " + str(evaluation.matthews_correlation_coefficient(1))) print("weightedMatthewsCorrelation: " + str(evaluation.weighted_matthews_correlation)) print("class priors: " + str(evaluation.class_priors)) print("numInstances: " + str(evaluation.num_instances)) print("meanAbsoluteError: " + str(evaluation.mean_absolute_error)) print("meanPriorAbsoluteError: " + str(evaluation.mean_prior_absolute_error)) print("relativeAbsoluteError: " + str(evaluation.relative_absolute_error)) print("rootMeanSquaredError: " + str(evaluation.root_mean_squared_error)) print("rootMeanPriorSquaredError: " + str(evaluation.root_mean_prior_squared_error)) print("rootRelativeSquaredError: " + str(evaluation.root_relative_squared_error)) print("prediction output:\n" + str(pred_output)) plot_cls.plot_roc(evaluation, title="ROC diabetes", class_index=range( 0, diabetes_data.class_attribute.num_values), wait=False) plot_cls.plot_prc(evaluation, title="PRC diabetes", class_index=range( 0, diabetes_data.class_attribute.num_values), wait=False) # train 2nd classifier on diabetes dataset classifier2 = Classifier(classname="weka.classifiers.trees.RandomForest") evaluation2 = Evaluation(diabetes_data) evaluation2.crossvalidate_model(classifier2, diabetes_data, 10, Random(42)) plot_cls.plot_rocs({ "NB": evaluation, "RF": evaluation2 }, title="ROC diabetes", class_index=0, wait=False) plot_cls.plot_prcs({ "NB": evaluation, "RF": evaluation2 }, title="PRC diabetes", class_index=0, wait=False) # load a numeric dataset bolts_file = helper.get_data_dir() + os.sep + "bolts.arff" helper.print_info("Loading dataset: " + bolts_file) loader = Loader("weka.core.converters.ArffLoader") bolts_data = loader.load_file(bolts_file) bolts_data.class_is_last() # build a classifier and output model helper.print_title("Training LinearRegression on bolts") classifier = Classifier( classname="weka.classifiers.functions.LinearRegression", options=["-S", "1", "-C"]) classifier.build_classifier(bolts_data) print(classifier) # cross-validate numeric classifier helper.print_title("Cross-validating LinearRegression on bolts") classifier = Classifier( classname="weka.classifiers.functions.LinearRegression", options=["-S", "1", "-C"]) evaluation = Evaluation(bolts_data) evaluation.crossvalidate_model(classifier, bolts_data, 10, Random(42)) print(evaluation.summary()) print("correlationCoefficient: " + str(evaluation.correlation_coefficient)) print("errorRate: " + str(evaluation.error_rate)) helper.print_title("Header - bolts") print(str(evaluation.header)) helper.print_title("Predictions on bolts") for index, pred in enumerate(evaluation.predictions): print( str(index + 1) + ": " + str(pred) + " -> error=" + str(pred.error)) plot_cls.plot_classifier_errors(evaluation.predictions, wait=False) # train 2nd classifier and show errors in same plot classifier2 = Classifier(classname="weka.classifiers.functions.SMOreg") evaluation2 = Evaluation(bolts_data) evaluation2.crossvalidate_model(classifier2, bolts_data, 10, Random(42)) plot_cls.plot_classifier_errors( { "LR": evaluation.predictions, "SMOreg": evaluation2.predictions }, wait=False) # learning curve cls = [ Classifier(classname="weka.classifiers.trees.J48"), Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable") ] plot_cls.plot_learning_curve(cls, diabetes_data, increments=0.05, label_template="[#] !", metric="percent_correct", wait=True) # access classifier's Java API labor_file = helper.get_data_dir() + os.sep + "labor.arff" helper.print_info("Loading dataset: " + labor_file) loader = Loader("weka.core.converters.ArffLoader") labor_data = loader.load_file(labor_file) labor_data.class_is_last() helper.print_title("Using JRip's Java API to access rules") jrip = Classifier(classname="weka.classifiers.rules.JRip") jrip.build_classifier(labor_data) rset = jrip.jwrapper.getRuleset() for i in xrange(rset.size()): r = rset.get(i) print(str(r.toString(labor_data.class_attribute.jobject)))
# CLASSIFIERS classifiers = [ ("Bayesian Network", Classifier(classname="weka.classifiers.bayes.BayesNet")), # ("Decision Tree", Classifier(classname="weka.classifiers.trees.J48")), # ("Logistic Regression", Classifier(classname="weka.classifiers.functions.Logistic")), # ("Multilayer Perceptron", Classifier(classname="weka.classifiers.functions.MultilayerPerceptron")), # ("Naive Bayes", Classifier(classname="weka.classifiers.bayes.NaiveBayes")), # ("Nearest Neighbour", Classifier(classname="weka.classifiers.lazy.IBk"))), ] # EVALUATION for name, cls in classifiers: print(name) evaluation = Evaluation(data) evaluation.crossvalidate_model(cls, data, 10, Random(42)) print(evaluation.summary()) print(evaluation.class_details()) print("areaUnderROC/1: " + str(evaluation.area_under_roc(1))) print("numFalseNegatives: " + str(evaluation.num_false_negatives(1))) print("numTrueNegatives: " + str(evaluation.num_true_negatives(1))) print("numFalsePositives: " + str(evaluation.num_false_positives(1))) print("numTruePositives: " + str(evaluation.num_true_positives(1))) jvm.stop() except: print("runtime error") jvm.stop()
for kernel in range(0, 2): if kernel == 0: mapper = Classifier( classname= "weka.classifiers.misc.InputMappedClassifier", options=[ "-W", "weka.classifiers.bayes.NaiveBayes" ]) Class = 'NaiveBayes' mapper.build_classifier(dataTrain) evaluation = Evaluation(dataTrain) evaluation.test_model( mapper, dataTest) aux1.append( evaluation.area_under_roc(1) * 100) if fold == 10: title.append('NB_' + str(begin) + 'to' + str(ntp)) roc.append( str(round( np.mean(aux1), 2))) else: mapper = Classifier( classname= "weka.classifiers.misc.InputMappedClassifier", options=[ "-W", "weka.classifiers.bayes.NaiveBayes",
NB = Classifier( classname="weka.classifiers.misc.InputMappedClassifier", options=["-M", "-W", "weka.classifiers.bayes.NaiveBayes"]) Class = 'NaiveBayes' NB.build_classifier(dataTrain) evaluationNB = Evaluation(dataTrain) evaluationNB.test_model(NB, dataTest) RF = Classifier( classname="weka.classifiers.misc.InputMappedClassifier", options=[ "-M", "-W", "weka.classifiers.trees.RandomForest", "--", "-I", '20' ]) Class = 'NaiveBayes' RF.build_classifier(dataTrain) evaluationRF = Evaluation(dataTrain) Perf.write( str(window) + '&' + dataset + '&' + str(np.round(evaluationNB.area_under_roc(1) * 100, 2)) + '&' + str(np.round(evaluationNB.precision(yIndex) * 100, 2)) + '&' + str(np.round(evaluationNB.recall(yIndex) * 100, 2)) + '\n') Scores.write( str(window) + ',' + dataset + ',' + str(np.round(evaluationNB.area_under_roc(1) * 100, 2)) + '\n') #Precision.write(str(window)+ '&' + dataset + '&' + str(np.round(evaluationNB.precision(1) * 100,2))+ '&' + str(np.round(evaluationRF.precision(1) * 100,2))+'\n') #Recall.write(str(window)+ '&' + dataset + '&' + str(np.round(evaluationNB.recall(1) * 100,2))+ '&' + str(np.round(evaluationRF.recall(1) * 100,2))+'\n') jvm.stop()