Exemplo n.º 1
0
    def getDecisionTree(self, inputPath):
        #load arff
        data = self.load_Arff(inputPath)

        #classifier
        data.set_class_index(data.num_attributes() - 1)  # set class attribute
        classifier = Classifier(classname="weka.classifiers.trees.J48",
                                options=["-C", "0.3"])

        data.set_class_index(data.num_attributes() - 1)
        classifier.build_classifier(data)

        classifierStr = str(classifier)
        for index in range(0, data.num_instances()):
            instance = data.get_instance(index)
            #print instance
            result = classifier.distribution_for_instance(instance)

            #print result
        graph = classifier.graph()
        return graph
 def getDecisionTree(self, inputPath):   
     #load arff  
     data = self.load_Arff(inputPath)  
         
     #classifier
     data.set_class_index(data.num_attributes() - 1)   # set class attribute
     classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
     
     data.set_class_index(data.num_attributes() - 1)
     classifier.build_classifier(data)
     
     
     classifierStr = str(classifier)
     for index in range(0,data.num_instances()):
         instance = data.get_instance(index)
         #print instance
         result = classifier.distribution_for_instance(instance)
         
         #print result
     graph = classifier.graph()
     return graph
Exemplo n.º 3
0
# 1a filter data
print("Filtering data...")
fltr = Filter("weka.filters.unsupervised.attribute.StringToWordVector")
fltr.set_inputformat(data)
filtered = fltr.filter(data)
filtered.set_class_index(0)

# 1b build classifier
print("Building/evaluating classifier...")
cls = Classifier(classname="weka.classifiers.trees.J48")
cls.build_classifier(filtered)
evl = Evaluation(filtered)
evl.test_model(cls, filtered)
print(evl.to_summary())
print(str(cls))
plg.plot_dot_graph(cls.graph())

# 2. filtered classifier
fname = data_dir + os.sep + "simpletext-test.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
test = loader.load_file(fname)
test.set_class_index(test.num_attributes() - 1)
print("Building/evaluating filtered classifier...")
cls = FilteredClassifier()
cls.set_classifier(Classifier(classname="weka.classifiers.trees.J48"))
cls.set_filter(Filter(classname="weka.filters.unsupervised.attribute.StringToWordVector"))
cls.build_classifier(data)
pout = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.PlainText")
pout.set_header(test)
evl = Evaluation(data)
Exemplo n.º 4
0
Arquivo: c.py Projeto: tanayz/Kaggle
# load a dataset
iris_file = "HairEyeColor.csv"
print("Loading dataset: " + iris_file)
loader = Loader(classname="weka.core.converters.CSVLoader")
iris_data = loader.load_file(iris_file)
print (iris_data.num_attributes)
iris_data.set_class_index(iris_data.num_attributes() - 1)
                                            
# build a classifier and output model
print ("Training J48 classifier on iris")
classifier = Classifier(classname="weka.test.Regression")
#classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.5"])
# Instead of using 'options=["-C", "0.3"]' in the constructor, we can also set the "confidenceFactor"
# property of the J48 classifier itself. However, being of type float rather than double, we need
# to convert it to the correct type first using the double_to_float function:
#classifier.set_property("confidenceFactor", types.double_to_float(0.3))
classifier.build_classifier(iris_data)
print(classifier)
print(classifier.graph())
#plot_graph.plot_dot_graph(classifier.graph())
    

evaluation = Evaluation(iris_data)                     # initialize with priors
evaluation.crossvalidate_model(classifier, iris_data, 10, Random(42))  # 10-fold CV
print(evaluation.to_summary())

print("pctCorrect: " + str(evaluation.percent_correct()))
print("incorrect: " + str(evaluation.incorrect()))
jvm.stop()