예제 #1
0
def CV5x2(dataset,  algo, num_datasets):

	loader = Loader(classname="weka.core.converters.ArffLoader")
	data = loader.load_file(dataset)
	data.class_is_last()

	cls = Classifier(classname=algo)

	evl = Evaluation(data)
	evl.crossvalidate_model(cls, data, 2, Random(5))

	print(evl.summary("=== " +str(algo)+ " on" + str(dataset) + " ===",False))
        print(evl.matrix("=== on click prediction(confusion matrix) ==="))
	print("For Algo"+ str(algo)+"areaUnderROC/1: for CV5x2 " + str(evl.area_under_roc(1)))

	return evl.area_under_roc(1)
예제 #2
0
    def crossTest(this, trainingFile, classifier, testFile):

        loader = Loader(classname="weka.core.converters.ArffLoader")
        data1 = loader.load_file(trainingFile)
        data1.class_is_last()

        cls = Classifier(classname=classifier)
        cls.build_classifier(data1)

        data2 = loader.load_file(testFile)
        data2.class_is_last()

        classes = [str(code) for code in data2.class_attribute.values]
        header = ["Accuracy"]
        for name in classes:
            header += [name + " TP", name + " FP", name + " AUC ROC"]
        values = []

        evl = Evaluation(data2)
        evl.test_model(cls, data2)

        values.append(evl.percent_correct)
        for name in classes:
            index = classes.index(name)
            values += [
                evl.true_positive_rate(index) * 100,
                evl.false_positive_rate(index) * 100,
                evl.area_under_roc(index)
            ]

        this.values = values
        this.header = header
예제 #3
0
    def runCV(this, arffFile, classifier, folds):

        loader = Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(arffFile)
        data.class_is_last()

        classes = [str(code) for code in data.class_attribute.values]
        header = ["Accuracy"]
        for name in classes:
            header += [name + " TP", name + " FP", name + " AUC ROC"]
        values = []

        cls = Classifier(classname=classifier)

        evl = Evaluation(data)
        evl.crossvalidate_model(cls, data, folds, Random(1))

        values.append(evl.percent_correct)
        for name in classes:
            index = classes.index(name)
            values += [
                evl.true_positive_rate(index) * 100,
                evl.false_positive_rate(index) * 100,
                evl.area_under_roc(index)
            ]

        this.values = values
        this.header = header
예제 #4
0
def HOV(dataset,  algo, num_datasets):
	#Executing HOV \_*-*_/

	loader = Loader(classname="weka.core.converters.ArffLoader")
	data = loader.load_file(dataset)
	data.class_is_last()

	train, test = data.train_test_split(70.0, Random(10))

	cls = Classifier(classname=algo)
	cls.build_classifier(train)

	evl = Evaluation(train)
	evl.test_model(cls, test)

	print(evl.summary("=== " +str(algo)+ " on" + str(dataset) + " ===",False))
        print(evl.matrix("=== on click prediction(confusion matrix) ==="))
	print("For Algo"+ str(algo)+"areaUnderROC/1: for HOV " + str(evl.area_under_roc(1)))

	return evl.area_under_roc(1)
def ClassifyWithDT(f3, test, tree, fileOut):

    eval = Evaluation(f3)
    tree.build_classifier(f3)

    eval.test_model(tree, test)

    print("\n\nSelf-Training   data========" +
          str((1 - eval.error_rate) * 100) + " number of instances==" +
          str(f3.num_instances) + "\n")
    print("\n Error Rate==" + str(eval.error_rate) + "\n")

    print("\n     precision   recall     areaUnderROC            \n\n")
    for i in range(test.get_instance(0).num_classes):
        print(
            str(eval.precision(i)) + "  " + str(eval.recall(i)) + "  " +
            str(eval.area_under_roc(i)) + "\n")

    return eval
예제 #6
0
def CV10(dataset, algo):
    print "inside 10cv"
    print("dataset ----" + dataset)
    print("algorithm ----" + algo)

    #Executing 10FCV

    #	jvm.start(packages=True)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(dataset)
    data.class_is_last()

    #print(data)

    cls = Classifier(classname=algo)

    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, 2, Random(5))

    print("areaUnderROC/1: " + str(evl.area_under_roc(1)))
예제 #7
0
def weka_bayesnet(filearffpath='data/datatobayes.arff'):
    """Simple calling of the bayesian network from python.
    """
    #Preparing the data
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file('data/datatobayes.arff')
    #data = loader.load_file('data/Full.arff')
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove",
                    options=["-R", "first"])
    remove.inputformat(data)
    filtered = data  #remove.filter(data)

    #Classifier test
    from weka.classifiers import Classifier, Evaluation
    from weka.core.classes import Random
    filtered.class_is_last()
    classifier = Classifier(classname="weka.classifiers.bayes.BayesNet",
                            options=['-D'])  #
    evaluation = Evaluation(filtered)
    evaluation.crossvalidate_model(classifier, filtered, 10, Random(42))
    return evaluation.area_under_roc(class_index=0)  #ROC, no std of kfold
예제 #8
0
def HOV(dataset, algo):
    print "inside hov"
    print("dataset ----" + dataset)
    print("algorithm ----" + algo)

    #Executing HOV \_*-*_/

    #	jvm.start(packages=True)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(dataset)
    data.class_is_last()

    train, test = data.train_test_split(70.0, Random(10))

    cls = Classifier(classname=algo)
    cls.build_classifier(train)

    evl = Evaluation(train)
    evl.test_model(cls, test)

    return (str(evl.area_under_roc(1)))
                    dataLastTrain.class_is_last()
                    dataLastTest.class_is_last()

                    from weka.classifiers import Evaluation
                    from weka.core.classes import Random
                    from weka.classifiers import Classifier
                    if classifier == 0:
                        for kernel in range(0,2):
                            if kernel == 0:
                                mapper = Classifier(classname="weka.classifiers.misc.InputMappedClassifier", options=["-M","-W", "weka.classifiers.bayes.NaiveBayes"])
                                Class = 'NaiveBayes'
                                mapper.build_classifier(dataTrain)
                                evaluation = Evaluation(dataTrain)
                                evaluation.test_model(mapper,dataTest)
                                Scores.write(str(evaluation.area_under_roc(1)*100) + ',')
                                recall_NB.append(evaluation.recall(1)*100)
                                precision_NB.append(evaluation.precision(1)*100)



                                mapper.build_classifier(dataLastTrain)
                                evaluation = Evaluation(dataLastTrain)
                                evaluation.test_model(mapper, dataLastTest)

                                ScoresLast.write(str(evaluation.area_under_roc(1) * 100)+',')

                            else:
                                mapper = Classifier(classname="weka.classifiers.misc.InputMappedClassifier",
                                                    options=["-M","-W", "weka.classifiers.bayes.NaiveBayes", "--", "-K"])
                                Class = 'NaiveBayes'
예제 #10
0
            
            eval = Evaluation(labledDataSet)
            eval.test_model(tree, test)
            
            fileOut.write("Labeled data======== " + str((1.0 - eval.error_rate )* 100) + " number of instances== " + str(labledDataSet.num_instances) + "\n")
            
            Newtrainpool = LabeledUnlabeldata(labledDataSet, UnlabledDataSet, tree, y )
            # Newtrainpool = LabeledUnlabeldata(labledDataSet, UnlabledDataSet, tree, y , cal_method=Method)

            fileOut.write("\n\nLabeled data======== " + str((1.0 - eval.error_rate )* 100) + " number of instances== " + str(labledDataSet.num_instances) + "\n")

            fileOut.write("           Decision Tree                       \n")
            fileOut.write("\n      precision   recall     areaUnderROC            \n\n")

            for i in range(test.get_instance(0).num_classes) :
                fileOut.write(str(eval.precision(i)) +"  "+str(eval.recall(i)) + "  "  +  str(eval.area_under_roc(i))+"\n")



            ClassifyWithDT(Newtrainpool, test, tree, fileOut )

            fileOut.write("\n")
            fileOut.write("########################################################\n")
            fileOut.write("\n")
            
        except Exception as e:
            raise e

    fileOut.write("\n")
    fileOut.write("\n")
    fileOut.write("########################################################\n")
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()

    # classifier help
    helper.print_title("Creating help string")
    classifier = Classifier(classname="weka.classifiers.trees.J48")
    print(classifier.to_help())

    # partial classname
    helper.print_title("Creating classifier from partial classname")
    clsname = ".J48"
    classifier = Classifier(classname=clsname)
    print(clsname + " --> " + classifier.classname)

    # classifier from commandline
    helper.print_title("Creating SMO from command-line string")
    cmdline = 'weka.classifiers.functions.SMO -K "weka.classifiers.functions.supportVector.NormalizedPolyKernel -E 3.0"'
    classifier = from_commandline(cmdline, classname="weka.classifiers.Classifier")
    classifier.build_classifier(iris_data)
    print("input: " + cmdline)
    print("output: " + classifier.to_commandline())
    print("model:\n" + str(classifier))

    # kernel classifier
    helper.print_title("Creating SMO as KernelClassifier")
    kernel = Kernel(classname="weka.classifiers.functions.supportVector.RBFKernel", options=["-G", "0.001"])
    classifier = KernelClassifier(classname="weka.classifiers.functions.SMO", options=["-M"])
    classifier.kernel = kernel
    classifier.build_classifier(iris_data)
    print("classifier: " + classifier.to_commandline())
    print("model:\n" + str(classifier))

    # build a classifier and output model
    helper.print_title("Training J48 classifier on iris")
    classifier = Classifier(classname="weka.classifiers.trees.J48")
    # Instead of using 'options=["-C", "0.3"]' in the constructor, we can also set the "confidenceFactor"
    # property of the J48 classifier itself. However, being of type float rather than double, we need
    # to convert it to the correct type first using the double_to_float function:
    classifier.set_property("confidenceFactor", typeconv.double_to_float(0.3))
    classifier.build_classifier(iris_data)
    print(classifier)
    print(classifier.graph)
    print(classifier.to_source("MyJ48"))
    plot_graph.plot_dot_graph(classifier.graph)

    # evaluate model on test set
    helper.print_title("Evaluating J48 classifier on iris")
    evaluation = Evaluation(iris_data)
    evl = evaluation.test_model(classifier, iris_data)
    print(evl)
    print(evaluation.summary())

    # evaluate model on train/test split
    helper.print_title("Evaluating J48 classifier on iris (random split 66%)")
    classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
    evaluation = Evaluation(iris_data)
    evaluation.evaluate_train_test_split(classifier, iris_data, 66.0, Random(1))
    print(evaluation.summary())

    # load a dataset incrementally and build classifier incrementally
    helper.print_title("Build classifier incrementally on iris")
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_inc = loader.load_file(iris_file, incremental=True)
    iris_inc.class_is_last()
    classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")
    classifier.build_classifier(iris_inc)
    for inst in loader:
        classifier.update_classifier(inst)
    print(classifier)

    # construct meta-classifiers
    helper.print_title("Meta classifiers")
    # generic FilteredClassifier instantiation
    print("generic FilteredClassifier instantiation")
    meta = SingleClassifierEnhancer(classname="weka.classifiers.meta.FilteredClassifier")
    meta.classifier = Classifier(classname="weka.classifiers.functions.LinearRegression")
    flter = Filter("weka.filters.unsupervised.attribute.Remove")
    flter.options = ["-R", "first"]
    meta.set_property("filter", flter.jobject)
    print(meta.to_commandline())
    # direct FilteredClassifier instantiation
    print("direct FilteredClassifier instantiation")
    meta = FilteredClassifier()
    meta.classifier = Classifier(classname="weka.classifiers.functions.LinearRegression")
    flter = Filter("weka.filters.unsupervised.attribute.Remove")
    flter.options = ["-R", "first"]
    meta.filter = flter
    print(meta.to_commandline())
    # generic Vote
    print("generic Vote instantiation")
    meta = MultipleClassifiersCombiner(classname="weka.classifiers.meta.Vote")
    classifiers = [
        Classifier(classname="weka.classifiers.functions.SMO"),
        Classifier(classname="weka.classifiers.trees.J48")
    ]
    meta.classifiers = classifiers
    print(meta.to_commandline())

    # cross-validate nominal classifier
    helper.print_title("Cross-validating NaiveBayes on diabetes")
    diabetes_file = helper.get_data_dir() + os.sep + "diabetes.arff"
    helper.print_info("Loading dataset: " + diabetes_file)
    loader = Loader("weka.core.converters.ArffLoader")
    diabetes_data = loader.load_file(diabetes_file)
    diabetes_data.class_is_last()
    classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayes")
    pred_output = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText", options=["-distribution"])
    evaluation = Evaluation(diabetes_data)
    evaluation.crossvalidate_model(classifier, diabetes_data, 10, Random(42), output=pred_output)
    print(evaluation.summary())
    print(evaluation.class_details())
    print(evaluation.matrix())
    print("areaUnderPRC/0: " + str(evaluation.area_under_prc(0)))
    print("weightedAreaUnderPRC: " + str(evaluation.weighted_area_under_prc))
    print("areaUnderROC/1: " + str(evaluation.area_under_roc(1)))
    print("weightedAreaUnderROC: " + str(evaluation.weighted_area_under_roc))
    print("avgCost: " + str(evaluation.avg_cost))
    print("totalCost: " + str(evaluation.total_cost))
    print("confusionMatrix: " + str(evaluation.confusion_matrix))
    print("correct: " + str(evaluation.correct))
    print("pctCorrect: " + str(evaluation.percent_correct))
    print("incorrect: " + str(evaluation.incorrect))
    print("pctIncorrect: " + str(evaluation.percent_incorrect))
    print("unclassified: " + str(evaluation.unclassified))
    print("pctUnclassified: " + str(evaluation.percent_unclassified))
    print("coverageOfTestCasesByPredictedRegions: " + str(evaluation.coverage_of_test_cases_by_predicted_regions))
    print("sizeOfPredictedRegions: " + str(evaluation.size_of_predicted_regions))
    print("falseNegativeRate: " + str(evaluation.false_negative_rate(1)))
    print("weightedFalseNegativeRate: " + str(evaluation.weighted_false_negative_rate))
    print("numFalseNegatives: " + str(evaluation.num_false_negatives(1)))
    print("trueNegativeRate: " + str(evaluation.true_negative_rate(1)))
    print("weightedTrueNegativeRate: " + str(evaluation.weighted_true_negative_rate))
    print("numTrueNegatives: " + str(evaluation.num_true_negatives(1)))
    print("falsePositiveRate: " + str(evaluation.false_positive_rate(1)))
    print("weightedFalsePositiveRate: " + str(evaluation.weighted_false_positive_rate))
    print("numFalsePositives: " + str(evaluation.num_false_positives(1)))
    print("truePositiveRate: " + str(evaluation.true_positive_rate(1)))
    print("weightedTruePositiveRate: " + str(evaluation.weighted_true_positive_rate))
    print("numTruePositives: " + str(evaluation.num_true_positives(1)))
    print("fMeasure: " + str(evaluation.f_measure(1)))
    print("weightedFMeasure: " + str(evaluation.weighted_f_measure))
    print("unweightedMacroFmeasure: " + str(evaluation.unweighted_macro_f_measure))
    print("unweightedMicroFmeasure: " + str(evaluation.unweighted_micro_f_measure))
    print("precision: " + str(evaluation.precision(1)))
    print("weightedPrecision: " + str(evaluation.weighted_precision))
    print("recall: " + str(evaluation.recall(1)))
    print("weightedRecall: " + str(evaluation.weighted_recall))
    print("kappa: " + str(evaluation.kappa))
    print("KBInformation: " + str(evaluation.kb_information))
    print("KBMeanInformation: " + str(evaluation.kb_mean_information))
    print("KBRelativeInformation: " + str(evaluation.kb_relative_information))
    print("SFEntropyGain: " + str(evaluation.sf_entropy_gain))
    print("SFMeanEntropyGain: " + str(evaluation.sf_mean_entropy_gain))
    print("SFMeanPriorEntropy: " + str(evaluation.sf_mean_prior_entropy))
    print("SFMeanSchemeEntropy: " + str(evaluation.sf_mean_scheme_entropy))
    print("matthewsCorrelationCoefficient: " + str(evaluation.matthews_correlation_coefficient(1)))
    print("weightedMatthewsCorrelation: " + str(evaluation.weighted_matthews_correlation))
    print("class priors: " + str(evaluation.class_priors))
    print("numInstances: " + str(evaluation.num_instances))
    print("meanAbsoluteError: " + str(evaluation.mean_absolute_error))
    print("meanPriorAbsoluteError: " + str(evaluation.mean_prior_absolute_error))
    print("relativeAbsoluteError: " + str(evaluation.relative_absolute_error))
    print("rootMeanSquaredError: " + str(evaluation.root_mean_squared_error))
    print("rootMeanPriorSquaredError: " + str(evaluation.root_mean_prior_squared_error))
    print("rootRelativeSquaredError: " + str(evaluation.root_relative_squared_error))
    print("prediction output:\n" + str(pred_output))
    plot_cls.plot_roc(
        evaluation, title="ROC diabetes",
        class_index=range(0, diabetes_data.class_attribute.num_values), wait=False)
    plot_cls.plot_prc(
        evaluation, title="PRC diabetes",
        class_index=range(0, diabetes_data.class_attribute.num_values), wait=False)

    # load a numeric dataset
    bolts_file = helper.get_data_dir() + os.sep + "bolts.arff"
    helper.print_info("Loading dataset: " + bolts_file)
    loader = Loader("weka.core.converters.ArffLoader")
    bolts_data = loader.load_file(bolts_file)
    bolts_data.class_is_last()

    # build a classifier and output model
    helper.print_title("Training LinearRegression on bolts")
    classifier = Classifier(classname="weka.classifiers.functions.LinearRegression", options=["-S", "1", "-C"])
    classifier.build_classifier(bolts_data)
    print(classifier)

    # cross-validate numeric classifier
    helper.print_title("Cross-validating LinearRegression on bolts")
    classifier = Classifier(classname="weka.classifiers.functions.LinearRegression", options=["-S", "1", "-C"])
    evaluation = Evaluation(bolts_data)
    evaluation.crossvalidate_model(classifier, bolts_data, 10, Random(42))
    print(evaluation.summary())
    print("correlationCoefficient: " + str(evaluation.correlation_coefficient))
    print("errorRate: " + str(evaluation.error_rate))
    helper.print_title("Header - bolts")
    print(str(evaluation.header))
    helper.print_title("Predictions on bolts")
    for index, pred in enumerate(evaluation.predictions):
        print(str(index+1) + ": " + str(pred) + " -> error=" + str(pred.error))
    plot_cls.plot_classifier_errors(evaluation.predictions, wait=False)

    # learning curve
    cls = [
        Classifier(classname="weka.classifiers.trees.J48"),
        Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")]
    plot_cls.plot_learning_curve(
        cls, diabetes_data, increments=0.05, label_template="[#] !", metric="percent_correct", wait=True)

    # access classifier's Java API
    labor_file = helper.get_data_dir() + os.sep + "labor.arff"
    helper.print_info("Loading dataset: " + labor_file)
    loader = Loader("weka.core.converters.ArffLoader")
    labor_data = loader.load_file(labor_file)
    labor_data.class_is_last()

    helper.print_title("Using JRip's Java API to access rules")
    jrip = Classifier(classname="weka.classifiers.rules.JRip")
    jrip.build_classifier(labor_data)
    rset = jrip.jwrapper.getRuleset()
    for i in range(rset.size()):
        r = rset.get(i)
        print(str(r.toString(labor_data.class_attribute.jobject)))
예제 #12
0
                            if classifier == 0:
                                for kernel in range(0, 1):
                                    if kernel == 0:
                                        mapper = Classifier(
                                            classname=
                                            "weka.classifiers.misc.InputMappedClassifier",
                                            options=[
                                                '-M', "-W",
                                                "weka.classifiers.bayes.NaiveBayes"
                                            ])
                                        Class = 'NaiveBayes'
                                        mapper.build_classifier(dataTrain)
                                        evaluation = Evaluation(dataTrain)
                                        evaluation.test_model(mapper, dataTest)
                                        roc_aux_NB.append(
                                            evaluation.area_under_roc(1) * 100)
                                        recall_aux_NB.append(
                                            evaluation.recall(1) * 100)
                                        precision_aux_NB.append(
                                            evaluation.precision(1) * 100)

                            elif classifier == 1:
                                for degree in range(3, 4):
                                    mapper = Classifier(
                                        classname=
                                        "weka.classifiers.misc.InputMappedClassifier",
                                        options=[
                                            '-M', "-W",
                                            "weka.classifiers.functions.SMO",
                                            "--", "-K",
                                            "weka.classifiers.functions.supportVector.PolyKernel -E "
예제 #13
0
import weka.core.jvm as jvm
import weka.core.converters as conv
from weka.classifiers import Evaluation, Classifier
from weka.core.classes import Random
import weka.plot.classifiers as plcls  # NB: matplotlib is required
import os

data_dir = "/home/suruchi/Desktop/BTECH Pro/new/click_prediction/"

jvm.start(packages=True)
from weka.core.converters import Loader

loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(data_dir + "click_prediction.arff")
data.class_is_last()

#print(data)

cls = Classifier(classname="weka.classifiers.bayes.NaiveBayes")

evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 2, Random(5))

print(evl.summary("=== NaiveBayes on click prediction (stats) ===", False))
print(evl.matrix("=== NaiveBayes on click prediction(confusion matrix) ==="))
#plcls.plot_classifier_errors(evl.predictions, absolute=False,wait = True)
plcls.plot_roc(evl, class_index=[0, 1], wait=True)
print("areaUnderROC/1: " + str(evl.area_under_roc(1)))

jvm.stop()
예제 #14
0
                        for kernel in range(0, 1):
                            if kernel == 0:
                                mapper = Classifier(
                                    classname=
                                    "weka.classifiers.misc.InputMappedClassifier",
                                    options=[
                                        "-M", "-W",
                                        "weka.classifiers.bayes.NaiveBayes"
                                    ])
                                Class = 'NaiveBayes'
                                mapper.build_classifier(dataTrainSlow)
                                evaluation = Evaluation(dataTrainSlow)
                                evaluation.test_model(mapper, dataTestSlow)

                                NB_AUC[seed - 1, fold - 1,
                                       0] = (evaluation.area_under_roc(1) *
                                             100)
                                NB_Recall[seed - 1, fold - 1,
                                          0] = (evaluation.recall(yIndexSlow) *
                                                100)
                                NB_Precision[seed - 1, fold - 1, 0] = (
                                    evaluation.precision(yIndexSlow) * 100)

                                if window == 365:
                                    mapper = Classifier(
                                        classname=
                                        "weka.classifiers.misc.InputMappedClassifier",
                                        options=[
                                            "-M", "-W",
                                            "weka.classifiers.bayes.NaiveBayes",
                                            '--', '-K'
                                              y,
                                              cal_method=Method)

            print("\n\nLabeled data======== " +
                  str((1.0 - eval.error_rate) * 100) +
                  " number of instances== " +
                  str(labledDataSet.num_instances) + "\n")

            print("           Decision Tree                       \n")
            print(
                "\n      precision   recall     areaUnderROC            \n\n")

            for i in range(test.get_instance(0).num_classes):
                print(
                    str(eval.precision(i)) + "  " + str(eval.recall(i)) +
                    "  " + str(eval.area_under_roc(i)) + "\n")

            ClassifyWithDT(Newtrainpool, test, tree, fileOut)

            print("\n")
            print("########################################################\n")
            print("\n")

        except Exception as e:
            raise e

    print("\n")
    print("\n")
    print("########################################################\n")
    print("########################################################\n")
    print("########################################################\n")
예제 #16
0
            RF = Classifier(
                classname="weka.classifiers.misc.InputMappedClassifier",
                options=[
                    "-M", "-W", "weka.classifiers.trees.RandomForest", "--",
                    "-I", '20'
                ])
            Class = 'NaiveBayes'
            RF.build_classifier(dataTrain)
            evaluationRF = Evaluation(dataTrain)
            evaluationRF.test_model(RF, dataTest)

            if dataset == 'First':
                Scores.write(
                    str(window) + ',' +
                    str(np.round(evaluationNB.area_under_roc(1) * 100, 2)) +
                    ',' +
                    str(np.round(evaluationRF.area_under_roc(1) * 100, 2)) +
                    '\n')
            else:
                ScoresLast.write(
                    str(window) + ',' +
                    str(np.round(evaluationNB.area_under_roc(1) * 100, 2)) +
                    ',' +
                    str(np.round(evaluationRF.area_under_roc(1) * 100, 2)) +
                    '\n')

            if ntp == 2 and dataset == 'First':

                Perf.write(
                    '\multirow{8}{*}{' + str(window) + 'd}' + ' & ' +
                # #print(cls.options)
                # cls.build_classifier(dataTrain)

                from weka.classifiers import Evaluation
                #print("Evaluating NB classifier")
                evaluation = Evaluation(dataTrain)
                evl = evaluation.test_model(mapper, dataTest)
                print('Window' + str(Window[window]) + '_S' + str(seed) +
                      '_Fold' + str(fold) + ': Performance')
                #print(evaluation.summary())
                #print(evaluation.class_details())
                #print(evaluation.matrix())
                #print(evaluation.summary())
                #print(evaluation.class_details())
                #print(evaluation.matrix())
                roc.append(evaluation.area_under_roc(1))
                sens.append(evaluation.true_positive_rate(1))
                spec.append(evaluation.true_negative_rate(1))
                if fold == 10 and seed == 5:
                    print('Window' + str(Window[window]) + '_S' + str(seed) +
                          '_Fold' + str(fold) + ': Performance')
                    print('AUC: ' + str(np.mean(roc)))
                    print('Sens: ' + str(np.mean(sens)))
                    print('Spec:' + str(np.mean(spec)))
                    Perf.write('Window' + str(Window[window]) +
                               ': Performance\n\n')
                    Perf.write('AUC: ' + str(np.mean(roc)) + '\n')
                    Perf.write('Sens: ' + str(np.mean(sens)) + '\n')
                    Perf.write('Spec:' + str(np.mean(spec)) + '\n')

            except:
예제 #18
0
            RF = Classifier(classname="weka.classifiers.misc.InputMappedClassifier",
                            options=["-M", "-W", "weka.classifiers.bayes.NaiveBayes"])
            # Class = 'NaiveBayes'
            # NB.build_classifier(dataTrain)
            # evaluationNB = Evaluation(dataTrain)
            # evaluationNB.test_model(NB, dataTest)

            # RF = Classifier(classname="weka.classifiers.misc.InputMappedClassifier",
            #                 options=["-M", "-W", "weka.classifiers.trees.RandomForest", "--", "-I",
            #                          '20'])
            Class = 'NaiveBayes'
            RF.build_classifier(dataTrain)
            evaluationRF = Evaluation(dataTrain)
            evaluationRF.test_model(RF, dataTest)
            print(evaluationRF.area_under_roc(1))

            if ntp == 2 and dataset == 'Slow':

                Perf.write(
                    '\multirow{6}{*}{' + str(window) + 'd}' + ' & ' + '\multirow{3}{*}{' + str(ntp) + '}' + ' & ' + dataset + ' & ' + str(np.round(evaluationRF.area_under_roc(1) * 100, 2)) + ' & ' + str(np.round(evaluationRF.precision(yIndex) * 100, 2)) + ' & ' + str(np.round(evaluationRF.recall(yIndex) * 100, 2)) + '\\\\\n')

                # Precision.write(
                #     '\multirow{8}{*}{' + str(window) + 'd}' + ' & ' + '\multirow{2}{*}{' + str(
                #         ntp) + '}' + ' & ' + dataset + ' & ' + str(
                #         np.round(evaluationNB.precision(yIndex) * 100, 2)) + ' & ' + str(
                #         np.round(evaluationRF.precision(yIndex) * 100, 2)) + '\\\\\n')
                #
                # Recall.write('\multirow{8}{*}{' + str(window) + 'd}' + ' & ' + '\multirow{2}{*}{' + str(
                #     ntp) + '}' + ' & ' + dataset + ' & ' + str(np.round(evaluationNB.recall(yIndex) * 100, 2)) + ' & ' + str(
                #     np.round(evaluationRF.recall(yIndex) * 100, 2)) + '\\\\\n')
예제 #19
0
                    from weka.classifiers import Classifier
                    if classifier == 0:
                        SMOTE = Filter(classname="weka.filters.supervised.instance.SMOTE", options=['-P', str(smote)])
                        SMOTE.inputformat(dataTrain)
                        dataTrain = SMOTE.filter(dataTrain)

                        SMOTE.inputformat(dataLastTrain)
                        dataLastTrain = SMOTE.filter(dataLastTrain)
                        for kernel in range(0,1):
                            if kernel == 0:
                                mapper = Classifier(classname="weka.classifiers.misc.InputMappedClassifier", options=["-M","-W", "weka.classifiers.bayes.NaiveBayes"])
                                Class = 'NaiveBayes'
                                mapper.build_classifier(dataTrain)
                                evaluation = Evaluation(dataTrain)
                                evaluation.test_model(mapper,dataTest)
                                roc_NB.append(evaluation.area_under_roc(1)*100)
                                recall_NB.append(evaluation.recall(yIndex)*100)
                                precision_NB.append(evaluation.precision(yIndex)*100)

                                mapper.build_classifier(dataLastTrain)
                                evaluation = Evaluation(dataLastTrain)
                                evaluation.test_model(mapper, dataLastTest)

                                roc_NB_Last.append(evaluation.area_under_roc(1) * 100)
                                recall_NB_Last.append(evaluation.recall(yIndex) * 100)
                                precision_NB_Last.append(evaluation.precision(yIndex) * 100)
                    elif classifier == 1:
                        for degree in [2]:
                            mapper = Classifier(classname="weka.classifiers.misc.InputMappedClassifier", options=["-M","-W", "weka.classifiers.functions.SMO", "--", "-K","weka.classifiers.functions.supportVector.PolyKernel -E " + str(degree)])
                            Class = 'SVM'
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()

    # classifier help
    helper.print_title("Creating help string")
    classifier = Classifier(classname="weka.classifiers.trees.J48")
    print(classifier.to_help())

    # partial classname
    helper.print_title("Creating classifier from partial classname")
    clsname = ".J48"
    classifier = Classifier(classname=clsname)
    print(clsname + " --> " + classifier.classname)

    # classifier from commandline
    helper.print_title("Creating SMO from command-line string")
    cmdline = 'weka.classifiers.functions.SMO -K "weka.classifiers.functions.supportVector.NormalizedPolyKernel -E 3.0"'
    classifier = from_commandline(cmdline,
                                  classname="weka.classifiers.Classifier")
    classifier.build_classifier(iris_data)
    print("input: " + cmdline)
    print("output: " + classifier.to_commandline())
    print("model:\n" + str(classifier))

    # kernel classifier
    helper.print_title("Creating SMO as KernelClassifier")
    kernel = Kernel(
        classname="weka.classifiers.functions.supportVector.RBFKernel",
        options=["-G", "0.001"])
    classifier = KernelClassifier(classname="weka.classifiers.functions.SMO",
                                  options=["-M"])
    classifier.kernel = kernel
    classifier.build_classifier(iris_data)
    print("classifier: " + classifier.to_commandline())
    print("model:\n" + str(classifier))

    # build a classifier and output model
    helper.print_title("Training J48 classifier on iris")
    classifier = Classifier(classname="weka.classifiers.trees.J48")
    # Instead of using 'options=["-C", "0.3"]' in the constructor, we can also set the "confidenceFactor"
    # property of the J48 classifier itself. However, being of type float rather than double, we need
    # to convert it to the correct type first using the double_to_float function:
    classifier.set_property("confidenceFactor", types.double_to_float(0.3))
    classifier.build_classifier(iris_data)
    print(classifier)
    print(classifier.graph)
    print(classifier.to_source("MyJ48"))
    plot_graph.plot_dot_graph(classifier.graph)

    # evaluate model on test set
    helper.print_title("Evaluating J48 classifier on iris")
    evaluation = Evaluation(iris_data)
    evl = evaluation.test_model(classifier, iris_data)
    print(evl)
    print(evaluation.summary())

    # evaluate model on train/test split
    helper.print_title("Evaluating J48 classifier on iris (random split 66%)")
    classifier = Classifier(classname="weka.classifiers.trees.J48",
                            options=["-C", "0.3"])
    evaluation = Evaluation(iris_data)
    evaluation.evaluate_train_test_split(classifier, iris_data, 66.0,
                                         Random(1))
    print(evaluation.summary())

    # load a dataset incrementally and build classifier incrementally
    helper.print_title("Build classifier incrementally on iris")
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_inc = loader.load_file(iris_file, incremental=True)
    iris_inc.class_is_last()
    classifier = Classifier(
        classname="weka.classifiers.bayes.NaiveBayesUpdateable")
    classifier.build_classifier(iris_inc)
    for inst in loader:
        classifier.update_classifier(inst)
    print(classifier)

    # construct meta-classifiers
    helper.print_title("Meta classifiers")
    # generic FilteredClassifier instantiation
    print("generic FilteredClassifier instantiation")
    meta = SingleClassifierEnhancer(
        classname="weka.classifiers.meta.FilteredClassifier")
    meta.classifier = Classifier(
        classname="weka.classifiers.functions.LinearRegression")
    flter = Filter("weka.filters.unsupervised.attribute.Remove")
    flter.options = ["-R", "first"]
    meta.set_property("filter", flter.jobject)
    print(meta.to_commandline())
    # direct FilteredClassifier instantiation
    print("direct FilteredClassifier instantiation")
    meta = FilteredClassifier()
    meta.classifier = Classifier(
        classname="weka.classifiers.functions.LinearRegression")
    flter = Filter("weka.filters.unsupervised.attribute.Remove")
    flter.options = ["-R", "first"]
    meta.filter = flter
    print(meta.to_commandline())
    # generic Vote
    print("generic Vote instantiation")
    meta = MultipleClassifiersCombiner(classname="weka.classifiers.meta.Vote")
    classifiers = [
        Classifier(classname="weka.classifiers.functions.SMO"),
        Classifier(classname="weka.classifiers.trees.J48")
    ]
    meta.classifiers = classifiers
    print(meta.to_commandline())

    # cross-validate nominal classifier
    helper.print_title("Cross-validating NaiveBayes on diabetes")
    diabetes_file = helper.get_data_dir() + os.sep + "diabetes.arff"
    helper.print_info("Loading dataset: " + diabetes_file)
    loader = Loader("weka.core.converters.ArffLoader")
    diabetes_data = loader.load_file(diabetes_file)
    diabetes_data.class_is_last()
    classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayes")
    pred_output = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText",
        options=["-distribution"])
    evaluation = Evaluation(diabetes_data)
    evaluation.crossvalidate_model(classifier,
                                   diabetes_data,
                                   10,
                                   Random(42),
                                   output=pred_output)
    print(evaluation.summary())
    print(evaluation.class_details())
    print(evaluation.matrix())
    print("areaUnderPRC/0: " + str(evaluation.area_under_prc(0)))
    print("weightedAreaUnderPRC: " + str(evaluation.weighted_area_under_prc))
    print("areaUnderROC/1: " + str(evaluation.area_under_roc(1)))
    print("weightedAreaUnderROC: " + str(evaluation.weighted_area_under_roc))
    print("avgCost: " + str(evaluation.avg_cost))
    print("totalCost: " + str(evaluation.total_cost))
    print("confusionMatrix: " + str(evaluation.confusion_matrix))
    print("correct: " + str(evaluation.correct))
    print("pctCorrect: " + str(evaluation.percent_correct))
    print("incorrect: " + str(evaluation.incorrect))
    print("pctIncorrect: " + str(evaluation.percent_incorrect))
    print("unclassified: " + str(evaluation.unclassified))
    print("pctUnclassified: " + str(evaluation.percent_unclassified))
    print("coverageOfTestCasesByPredictedRegions: " +
          str(evaluation.coverage_of_test_cases_by_predicted_regions))
    print("sizeOfPredictedRegions: " +
          str(evaluation.size_of_predicted_regions))
    print("falseNegativeRate: " + str(evaluation.false_negative_rate(1)))
    print("weightedFalseNegativeRate: " +
          str(evaluation.weighted_false_negative_rate))
    print("numFalseNegatives: " + str(evaluation.num_false_negatives(1)))
    print("trueNegativeRate: " + str(evaluation.true_negative_rate(1)))
    print("weightedTrueNegativeRate: " +
          str(evaluation.weighted_true_negative_rate))
    print("numTrueNegatives: " + str(evaluation.num_true_negatives(1)))
    print("falsePositiveRate: " + str(evaluation.false_positive_rate(1)))
    print("weightedFalsePositiveRate: " +
          str(evaluation.weighted_false_positive_rate))
    print("numFalsePositives: " + str(evaluation.num_false_positives(1)))
    print("truePositiveRate: " + str(evaluation.true_positive_rate(1)))
    print("weightedTruePositiveRate: " +
          str(evaluation.weighted_true_positive_rate))
    print("numTruePositives: " + str(evaluation.num_true_positives(1)))
    print("fMeasure: " + str(evaluation.f_measure(1)))
    print("weightedFMeasure: " + str(evaluation.weighted_f_measure))
    print("unweightedMacroFmeasure: " +
          str(evaluation.unweighted_macro_f_measure))
    print("unweightedMicroFmeasure: " +
          str(evaluation.unweighted_micro_f_measure))
    print("precision: " + str(evaluation.precision(1)))
    print("weightedPrecision: " + str(evaluation.weighted_precision))
    print("recall: " + str(evaluation.recall(1)))
    print("weightedRecall: " + str(evaluation.weighted_recall))
    print("kappa: " + str(evaluation.kappa))
    print("KBInformation: " + str(evaluation.kb_information))
    print("KBMeanInformation: " + str(evaluation.kb_mean_information))
    print("KBRelativeInformation: " + str(evaluation.kb_relative_information))
    print("SFEntropyGain: " + str(evaluation.sf_entropy_gain))
    print("SFMeanEntropyGain: " + str(evaluation.sf_mean_entropy_gain))
    print("SFMeanPriorEntropy: " + str(evaluation.sf_mean_prior_entropy))
    print("SFMeanSchemeEntropy: " + str(evaluation.sf_mean_scheme_entropy))
    print("matthewsCorrelationCoefficient: " +
          str(evaluation.matthews_correlation_coefficient(1)))
    print("weightedMatthewsCorrelation: " +
          str(evaluation.weighted_matthews_correlation))
    print("class priors: " + str(evaluation.class_priors))
    print("numInstances: " + str(evaluation.num_instances))
    print("meanAbsoluteError: " + str(evaluation.mean_absolute_error))
    print("meanPriorAbsoluteError: " +
          str(evaluation.mean_prior_absolute_error))
    print("relativeAbsoluteError: " + str(evaluation.relative_absolute_error))
    print("rootMeanSquaredError: " + str(evaluation.root_mean_squared_error))
    print("rootMeanPriorSquaredError: " +
          str(evaluation.root_mean_prior_squared_error))
    print("rootRelativeSquaredError: " +
          str(evaluation.root_relative_squared_error))
    print("prediction output:\n" + str(pred_output))
    plot_cls.plot_roc(evaluation,
                      title="ROC diabetes",
                      class_index=range(
                          0, diabetes_data.class_attribute.num_values),
                      wait=False)
    plot_cls.plot_prc(evaluation,
                      title="PRC diabetes",
                      class_index=range(
                          0, diabetes_data.class_attribute.num_values),
                      wait=False)

    # train 2nd classifier on diabetes dataset
    classifier2 = Classifier(classname="weka.classifiers.trees.RandomForest")
    evaluation2 = Evaluation(diabetes_data)
    evaluation2.crossvalidate_model(classifier2, diabetes_data, 10, Random(42))
    plot_cls.plot_rocs({
        "NB": evaluation,
        "RF": evaluation2
    },
                       title="ROC diabetes",
                       class_index=0,
                       wait=False)
    plot_cls.plot_prcs({
        "NB": evaluation,
        "RF": evaluation2
    },
                       title="PRC diabetes",
                       class_index=0,
                       wait=False)

    # load a numeric dataset
    bolts_file = helper.get_data_dir() + os.sep + "bolts.arff"
    helper.print_info("Loading dataset: " + bolts_file)
    loader = Loader("weka.core.converters.ArffLoader")
    bolts_data = loader.load_file(bolts_file)
    bolts_data.class_is_last()

    # build a classifier and output model
    helper.print_title("Training LinearRegression on bolts")
    classifier = Classifier(
        classname="weka.classifiers.functions.LinearRegression",
        options=["-S", "1", "-C"])
    classifier.build_classifier(bolts_data)
    print(classifier)

    # cross-validate numeric classifier
    helper.print_title("Cross-validating LinearRegression on bolts")
    classifier = Classifier(
        classname="weka.classifiers.functions.LinearRegression",
        options=["-S", "1", "-C"])
    evaluation = Evaluation(bolts_data)
    evaluation.crossvalidate_model(classifier, bolts_data, 10, Random(42))
    print(evaluation.summary())
    print("correlationCoefficient: " + str(evaluation.correlation_coefficient))
    print("errorRate: " + str(evaluation.error_rate))
    helper.print_title("Header - bolts")
    print(str(evaluation.header))
    helper.print_title("Predictions on bolts")
    for index, pred in enumerate(evaluation.predictions):
        print(
            str(index + 1) + ": " + str(pred) + " -> error=" + str(pred.error))
    plot_cls.plot_classifier_errors(evaluation.predictions, wait=False)

    # train 2nd classifier and show errors in same plot
    classifier2 = Classifier(classname="weka.classifiers.functions.SMOreg")
    evaluation2 = Evaluation(bolts_data)
    evaluation2.crossvalidate_model(classifier2, bolts_data, 10, Random(42))
    plot_cls.plot_classifier_errors(
        {
            "LR": evaluation.predictions,
            "SMOreg": evaluation2.predictions
        },
        wait=False)

    # learning curve
    cls = [
        Classifier(classname="weka.classifiers.trees.J48"),
        Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")
    ]
    plot_cls.plot_learning_curve(cls,
                                 diabetes_data,
                                 increments=0.05,
                                 label_template="[#] !",
                                 metric="percent_correct",
                                 wait=True)

    # access classifier's Java API
    labor_file = helper.get_data_dir() + os.sep + "labor.arff"
    helper.print_info("Loading dataset: " + labor_file)
    loader = Loader("weka.core.converters.ArffLoader")
    labor_data = loader.load_file(labor_file)
    labor_data.class_is_last()

    helper.print_title("Using JRip's Java API to access rules")
    jrip = Classifier(classname="weka.classifiers.rules.JRip")
    jrip.build_classifier(labor_data)
    rset = jrip.jwrapper.getRuleset()
    for i in xrange(rset.size()):
        r = rset.get(i)
        print(str(r.toString(labor_data.class_attribute.jobject)))
예제 #21
0
    # CLASSIFIERS
    classifiers = [
        ("Bayesian Network",
         Classifier(classname="weka.classifiers.bayes.BayesNet")),
        #	("Decision Tree", Classifier(classname="weka.classifiers.trees.J48")),
        #	("Logistic Regression", Classifier(classname="weka.classifiers.functions.Logistic")),
        #	("Multilayer Perceptron", Classifier(classname="weka.classifiers.functions.MultilayerPerceptron")),
        #	("Naive Bayes", Classifier(classname="weka.classifiers.bayes.NaiveBayes")),
        #	("Nearest Neighbour", Classifier(classname="weka.classifiers.lazy.IBk"))),
    ]

    # EVALUATION
    for name, cls in classifiers:
        print(name)
        evaluation = Evaluation(data)
        evaluation.crossvalidate_model(cls, data, 10, Random(42))
        print(evaluation.summary())
        print(evaluation.class_details())
        print("areaUnderROC/1: " + str(evaluation.area_under_roc(1)))
        print("numFalseNegatives: " + str(evaluation.num_false_negatives(1)))
        print("numTrueNegatives: " + str(evaluation.num_true_negatives(1)))
        print("numFalsePositives: " + str(evaluation.num_false_positives(1)))
        print("numTruePositives: " + str(evaluation.num_true_positives(1)))

    jvm.stop()

except:
    print("runtime error")
    jvm.stop()
 for kernel in range(0, 2):
     if kernel == 0:
         mapper = Classifier(
             classname=
             "weka.classifiers.misc.InputMappedClassifier",
             options=[
                 "-W",
                 "weka.classifiers.bayes.NaiveBayes"
             ])
         Class = 'NaiveBayes'
         mapper.build_classifier(dataTrain)
         evaluation = Evaluation(dataTrain)
         evaluation.test_model(
             mapper, dataTest)
         aux1.append(
             evaluation.area_under_roc(1) *
             100)
         if fold == 10:
             title.append('NB_' +
                          str(begin) +
                          'to' + str(ntp))
             roc.append(
                 str(round(
                     np.mean(aux1), 2)))
     else:
         mapper = Classifier(
             classname=
             "weka.classifiers.misc.InputMappedClassifier",
             options=[
                 "-W",
                 "weka.classifiers.bayes.NaiveBayes",
예제 #23
0
        NB = Classifier(
            classname="weka.classifiers.misc.InputMappedClassifier",
            options=["-M", "-W", "weka.classifiers.bayes.NaiveBayes"])
        Class = 'NaiveBayes'
        NB.build_classifier(dataTrain)
        evaluationNB = Evaluation(dataTrain)
        evaluationNB.test_model(NB, dataTest)

        RF = Classifier(
            classname="weka.classifiers.misc.InputMappedClassifier",
            options=[
                "-M", "-W", "weka.classifiers.trees.RandomForest", "--", "-I",
                '20'
            ])
        Class = 'NaiveBayes'
        RF.build_classifier(dataTrain)
        evaluationRF = Evaluation(dataTrain)

        Perf.write(
            str(window) + '&' + dataset + '&' +
            str(np.round(evaluationNB.area_under_roc(1) * 100, 2)) + '&' +
            str(np.round(evaluationNB.precision(yIndex) * 100, 2)) + '&' +
            str(np.round(evaluationNB.recall(yIndex) * 100, 2)) + '\n')
        Scores.write(
            str(window) + ',' + dataset + ',' +
            str(np.round(evaluationNB.area_under_roc(1) * 100, 2)) + '\n')
        #Precision.write(str(window)+ '&' + dataset + '&' + str(np.round(evaluationNB.precision(1) * 100,2))+ '&' + str(np.round(evaluationRF.precision(1) * 100,2))+'\n')

        #Recall.write(str(window)+ '&' + dataset + '&' + str(np.round(evaluationNB.recall(1) * 100,2))+ '&' + str(np.round(evaluationRF.recall(1) * 100,2))+'\n')
jvm.stop()