Exemplo n.º 1
0
evl = Evaluation(filtered)
evl.test_model(cls, filtered)
print(evl.to_summary())
print(str(cls))
plg.plot_dot_graph(cls.graph())

# 2. filtered classifier
fname = data_dir + os.sep + "simpletext-test.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
test = loader.load_file(fname)
test.set_class_index(test.num_attributes() - 1)
print("Building/evaluating filtered classifier...")
cls = FilteredClassifier()
cls.set_classifier(Classifier(classname="weka.classifiers.trees.J48"))
cls.set_filter(Filter(classname="weka.filters.unsupervised.attribute.StringToWordVector"))
cls.build_classifier(data)
pout = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.PlainText")
pout.set_header(test)
evl = Evaluation(data)
evl.test_model(cls, test, pout)
print(str(pout))
print(str(cls))

# load ReutersCorn-train
fname = data_dir + os.sep + "ReutersCorn-train.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)
Exemplo n.º 2
0
data.set_class_index(data.num_attributes() - 1)

# 1. cheating with default filter
fltr = Filter(classname="weka.filters.supervised.attribute.Discretize", options=[])
fltr.set_inputformat(data)
filtered = fltr.filter(data)
cls = Classifier(classname="weka.classifiers.trees.J48")
evl = Evaluation(filtered)
evl.crossvalidate_model(cls, filtered, 10, Random(1))
cls.build_classifier(filtered)
print("cheating (default): accuracy=%0.1f nodes=%s" % (evl.percent_correct(), get_nodes(str(cls))))

# 2. using FilteredClassifier with default filter
cls = FilteredClassifier()
cls.set_classifier(Classifier(classname="weka.classifiers.trees.J48"))
cls.set_filter(Filter(classname="weka.filters.supervised.attribute.Discretize", options=[]))
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(1))
cls.build_classifier(data)
print("FilteredClassifier (default): accuracy=%0.1f nodes=%s" % (evl.percent_correct(), get_nodes(str(cls))))

# 3. using FilteredClassifier (make binary)
cls = FilteredClassifier()
cls.set_classifier(Classifier(classname="weka.classifiers.trees.J48"))
cls.set_filter(Filter(classname="weka.filters.supervised.attribute.Discretize", options=["-D"]))
evl = Evaluation(data)
evl.crossvalidate_model(cls, data, 10, Random(1))
cls.build_classifier(data)
print("FilteredClassifier (make binary): accuracy=%0.1f nodes=%s" % (evl.percent_correct(), get_nodes(str(cls))))

# 1. cheating with make binary