def expe_printer(res_file, comparison_metric): loader = weka.core.converters.loader_for_file(res_file) data = loader.load_file(res_file) matrix = ResultMatrix(classname="weka.experiment.ResultMatrixPlainText") tester = Tester(classname="weka.experiment.PairedCorrectedTTester") tester.resultmatrix = matrix comparison_col = data.attribute_by_name(comparison_metric).index tester.instances = data print(tester.header(comparison_col)) print(tester.multi_resultset_full(0, comparison_col))
def main(): """ Just runs some example code. """ print(helper.get_data_dir()) # cross-validation + classification helper.print_title("Experiment: Cross-validation + classification") datasets = [ helper.get_data_dir() + os.sep + "iris.arff", helper.get_data_dir() + os.sep + "anneal.arff" ] classifiers = [ Classifier("weka.classifiers.rules.ZeroR"), Classifier("weka.classifiers.trees.J48") ] outfile = tempfile.gettempdir() + os.sep + "results-cv.arff" exp = SimpleCrossValidationExperiment(classification=True, runs=10, folds=10, datasets=datasets, classifiers=classifiers, result=outfile) exp.setup() exp.run() # evaluate loader = converters.loader_for_file(outfile) data = loader.load_file(outfile) matrix = ResultMatrix("weka.experiment.ResultMatrixPlainText") tester = Tester("weka.experiment.PairedCorrectedTTester") tester.resultmatrix = matrix comparison_col = data.attribute_by_name("Percent_correct").index tester.instances = data print(tester.header(comparison_col)) print(tester.multi_resultset_full(0, comparison_col)) # random split + regression helper.print_title("Experiment: Random split + regression") datasets = [ helper.get_data_dir() + os.sep + "bolts.arff", helper.get_data_dir() + os.sep + "bodyfat.arff" ] classifiers = [ Classifier("weka.classifiers.rules.ZeroR"), Classifier("weka.classifiers.functions.LinearRegression") ] outfile = tempfile.gettempdir() + os.sep + "results-rs.arff" exp = SimpleRandomSplitExperiment(classification=False, runs=10, percentage=66.6, preserve_order=False, datasets=datasets, classifiers=classifiers, result=outfile) exp.setup() exp.run() # evaluate loader = converters.loader_for_file(outfile) data = loader.load_file(outfile) matrix = ResultMatrix("weka.experiment.ResultMatrixPlainText") tester = Tester("weka.experiment.PairedCorrectedTTester") tester.resultmatrix = matrix comparison_col = data.attribute_by_name("Correlation_coefficient").index tester.instances = data print(tester.header(comparison_col)) print(tester.multi_resultset_full(0, comparison_col)) # plot plot_exp.plot_experiment(matrix, title="Random split", measure="Correlation coefficient", wait=True)
classifiers = [Classifier(classname="weka.classifiers.trees.J48")] outfile = tempfile.gettempdir() + os.sep + "results-cv.arff" exp = SimpleCrossValidationExperiment(datasets=datasets, classifiers=classifiers, folds=10, runs=10, result=outfile) exp.setup() exp.run() # evaluate experiment loader = converters.loader_for_file(outfile) data = loader.load_file(outfile) matrix = ResultMatrix(classname="weka.experiment.ResultMatrixPlainText", options=[ "-print-row-names", "-print-col-names", "-enum-col-names", "-show-stddev" ]) tester = Tester(classname="weka.experiment.PairedCorrectedTTester", options=["-V", "-S", "0.05"]) comparison_col = data.attribute_by_name("Percent_correct").index tester.instances = data tester.resultmatrix = matrix print(tester.header(comparison_col)) print(tester.multi_resultset_full(0, comparison_col)) # setup random-split experiment and run it outfile = tempfile.gettempdir() + os.sep + "results-rs.csv" exp = SimpleRandomSplitExperiment(datasets=datasets, classifiers=classifiers, percentage=90,
Classifier(classname="weka.classifiers.rules.OneR"), Classifier(classname="weka.classifiers.trees.J48"), Classifier(classname="weka.classifiers.bayes.NaiveBayes"), Classifier(classname="weka.classifiers.lazy.IBk"), Classifier(classname="weka.classifiers.functions.SMO"), Classifier(classname="weka.classifiers.meta.AdaBoostM1"), # handles only 2-class problems: Classifier(classname="weka.classifiers.functions.VotedPerceptron") ] outfile = tempfile.gettempdir( ) + os.sep + "results-cv.arff" # store results for later analysis exp = SimpleCrossValidationExperiment(classification=True, runs=10, folds=10, datasets=datasets, classifiers=classifiers, result=outfile) exp.setup() exp.run() # evaluate previous run loader = converters.loader_for_file(outfile) data = loader.load_file(outfile) matrix = ResultMatrix(classname="weka.experiment.ResultMatrixPlainText") tester = Tester(classname="weka.experiment.PairedCorrectedTTester") tester.resultmatrix = matrix comparison_col = data.attribute_by_name("Percent_correct").index tester.instances = data print(tester.header(comparison_col)) print(tester.multi_resultset_full(0, comparison_col)) jvm.stop()