def test_join_options(self): """ Tests the join_options method. """ self.assertEqual("", str(classes.join_options([]))) self.assertEqual( "-t /some/where/test.arff", str(classes.join_options(["-t", "/some/where/test.arff"])))
def main(): """ Runs a datagenerator from the command-line. Calls JVM start/stop automatically. Use -h to see all options. """ parser = argparse.ArgumentParser( description='Executes a data generator from the command-line. Calls JVM start/stop automatically.') parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories") parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m") parser.add_argument("datagenerator", help="data generator classname, e.g., " + "weka.datagenerators.classifiers.classification.LED24") parser.add_argument("option", nargs=argparse.REMAINDER, help="additional data generator options") parsed = parser.parse_args() jars = [] if parsed.classpath is not None: jars = parsed.classpath.split(os.pathsep) jvm.start(jars, max_heap_size=parsed.heap, packages=True) logger.debug("Commandline: " + join_options(sys.argv[1:])) try: generator = DataGenerator(classname=parsed.datagenerator) if len(parsed.option) > 0: generator.options = parsed.option DataGenerator.make_data(generator, parsed.option) except Exception, e: print(e)
def main(): """ Runs a associator from the command-line. Calls JVM start/stop automatically. Use -h to see all options. """ parser = argparse.ArgumentParser( description='Executes an associator from the command-line. Calls JVM start/stop automatically.') parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories") parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m") parser.add_argument("-t", metavar="train", dest="train", required=True, help="training set file") parser.add_argument("associator", help="associator classname, e.g., weka.associations.Apriori") parser.add_argument("option", nargs=argparse.REMAINDER, help="additional associator options") parsed = parser.parse_args() jars = [] if parsed.classpath is not None: jars = parsed.classpath.split(os.pathsep) jvm.start(jars, max_heap_size=parsed.heap, packages=True) logger.debug("Commandline: " + join_options(sys.argv[1:])) try: associator = Associator(classname=parsed.associator) if len(parsed.option) > 0: associator.options = parsed.option loader = converters.loader_for_file(parsed.train) data = loader.load_file(parsed.train) associator.build_associations(data) print(str(associator)) except Exception, e: print(e)
def main(): """ Runs a associator from the command-line. Calls JVM start/stop automatically. Use -h to see all options. """ parser = argparse.ArgumentParser( description='Executes an associator from the command-line. Calls JVM start/stop automatically.') parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories") parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m") parser.add_argument("-t", metavar="train", dest="train", required=True, help="training set file") parser.add_argument("associator", help="associator classname, e.g., weka.associations.Apriori") parser.add_argument("option", nargs=argparse.REMAINDER, help="additional associator options") parsed = parser.parse_args() jars = [] if parsed.classpath is not None: jars = parsed.classpath.split(os.pathsep) jvm.start(jars, max_heap_size=parsed.heap, packages=True) logger.debug("Commandline: " + join_options(sys.argv[1:])) try: associator = Associator(classname=parsed.associator) if len(parsed.option) > 0: associator.options = parsed.option loader = converters.loader_for_file(parsed.train) data = loader.load_file(parsed.train) associator.build_associations(data) print(str(associator)) except Exception as e: print(e) finally: jvm.stop()
def main(): """ Runs a clusterer from the command-line. Calls JVM start/stop automatically. Use -h to see all options. """ parser = argparse.ArgumentParser( description='Performs clustering from the command-line. Calls JVM start/stop automatically.') parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories") parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m") parser.add_argument("-t", metavar="train", dest="train", required=True, help="training set file") parser.add_argument("-T", metavar="test", dest="test", help="test set file") parser.add_argument("-d", metavar="outmodel", dest="outmodel", help="model output file name") parser.add_argument("-l", metavar="inmodel", dest="inmodel", help="model input file name") parser.add_argument("-p", metavar="attributes", dest="attributes", help="attribute range") parser.add_argument("-x", metavar="num folds", dest="numfolds", help="number of folds") parser.add_argument("-s", metavar="seed", dest="seed", help="seed value for randomization") parser.add_argument("-c", metavar="class index", dest="classindex", help="1-based class attribute index") parser.add_argument("-g", metavar="graph", dest="graph", help="graph output file (if supported)") parser.add_argument("clusterer", help="clusterer classname, e.g., weka.clusterers.SimpleKMeans") parser.add_argument("option", nargs=argparse.REMAINDER, help="additional clusterer options") parsed = parser.parse_args() jars = [] if parsed.classpath is not None: jars = parsed.classpath.split(os.pathsep) params = [] if parsed.train is not None: params.extend(["-t", parsed.train]) if parsed.test is not None: params.extend(["-T", parsed.test]) if parsed.outmodel is not None: params.extend(["-d", parsed.outmodel]) if parsed.inmodel is not None: params.extend(["-l", parsed.inmodel]) if parsed.attributes is not None: params.extend(["-p", parsed.attributes]) if parsed.numfolds is not None: params.extend(["-x", parsed.numfolds]) if parsed.seed is not None: params.extend(["-s", parsed.seed]) if parsed.classindex is not None: params.extend(["-c", parsed.classindex]) if parsed.graph is not None: params.extend(["-g", parsed.graph]) jvm.start(jars, max_heap_size=parsed.heap, packages=True) logger.debug("Commandline: " + join_options(sys.argv[1:])) try: clusterer = Clusterer(classname=parsed.clusterer) if len(parsed.option) > 0: clusterer.options = parsed.option print(ClusterEvaluation.evaluate_clusterer(clusterer, params)) except Exception as e: print(e) finally: jvm.stop()
def main(args=None): """ Runs attribute selection from the command-line. Calls JVM start/stop automatically. Use -h to see all options. :param args: the command-line arguments to use, uses sys.argv if None :type args: list """ parser = argparse.ArgumentParser( description='Performs attribute selection from the command-line. Calls JVM start/stop automatically.') parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories") parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m") parser.add_argument("-i", metavar="input", dest="input", required=True, help="input file") parser.add_argument("-c", metavar="class index", dest="classindex", help="1-based class attribute index") parser.add_argument("-s", metavar="search", dest="search", help="search method, classname and options") parser.add_argument("-x", metavar="num folds", dest="numfolds", help="number of folds") parser.add_argument("-n", metavar="seed", dest="seed", help="the seed value for randomization") parser.add_argument("evaluator", help="evaluator classname, e.g., weka.attributeSelection.CfsSubsetEval") parser.add_argument("option", nargs=argparse.REMAINDER, help="additional evaluator options") parsed = parser.parse_args(args=args) jars = [] if parsed.classpath is not None: jars = parsed.classpath.split(os.pathsep) params = [] if parsed.input is not None: params.extend(["-i", parsed.input]) if parsed.classindex is not None: params.extend(["-c", parsed.classindex]) if parsed.search is not None: params.extend(["-s", parsed.search]) if parsed.numfolds is not None: params.extend(["-x", parsed.numfolds]) if parsed.seed is not None: params.extend(["-n", parsed.seed]) jvm.start(jars, max_heap_size=parsed.heap, packages=True) logger.debug("Commandline: " + join_options(sys.argv[1:])) try: evaluation = ASEvaluation(classname=parsed.evaluator) if len(parsed.option) > 0: evaluation.options = parsed.option print(AttributeSelection.attribute_selection(evaluation, params)) except Exception as e: print(traceback.format_exc()) finally: jvm.stop()
def main(): """ Runs attribute selection from the command-line. Calls JVM start/stop automatically. Use -h to see all options. """ parser = argparse.ArgumentParser( description='Performs attribute selection from the command-line. Calls JVM start/stop automatically.') parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories") parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m") parser.add_argument("-i", metavar="input", dest="input", required=True, help="input file") parser.add_argument("-c", metavar="class index", dest="classindex", help="1-based class attribute index") parser.add_argument("-s", metavar="search", dest="search", help="search method, classname and options") parser.add_argument("-x", metavar="num folds", dest="numfolds", help="number of folds") parser.add_argument("-n", metavar="seed", dest="seed", help="the seed value for randomization") parser.add_argument("evaluator", help="evaluator classname, e.g., weka.attributeSelection.CfsSubsetEval") parser.add_argument("option", nargs=argparse.REMAINDER, help="additional evaluator options") parsed = parser.parse_args() jars = [] if parsed.classpath is not None: jars = parsed.classpath.split(os.pathsep) params = [] if parsed.input is not None: params.extend(["-i", parsed.input]) if parsed.classindex is not None: params.extend(["-c", parsed.classindex]) if parsed.search is not None: params.extend(["-s", parsed.search]) if parsed.numfolds is not None: params.extend(["-x", parsed.numfolds]) if parsed.seed is not None: params.extend(["-n", parsed.seed]) jvm.start(jars, max_heap_size=parsed.heap, packages=True) logger.debug("Commandline: " + join_options(sys.argv[1:])) try: evaluation = ASEvaluation(classname=parsed.evaluator) if len(parsed.option) > 0: evaluation.options = parsed.option print(AttributeSelection.attribute_selection(evaluation, params)) except Exception as e: print(e) finally: jvm.stop()
def test_join_options(self): """ Tests the join_options method. """ self.assertEqual("", str(classes.join_options([]))) self.assertEqual("-t /some/where/test.arff", str(classes.join_options(["-t", "/some/where/test.arff"])))
def plot_learning_curve(classifiers, train, test=None, increments=100, metric="percent_correct", title="Learning curve", label_template="[#] @ $", key_loc="lower right", outfile=None, wait=True): """ Plots :param classifiers: list of Classifier template objects :type classifiers: list of Classifier :param train: dataset to use for the building the classifier, used for evaluating it test set None :type train: Instances :param test: optional dataset to use for the testing the built classifiers :type test: Instances :param increments: the increments (>= 1: # of instances, <1: percentage of dataset) :type increments: float :param metric: the name of the numeric metric to plot (Evaluation.<metric>) :type metric: str :param title: the title for the plot :type title: str :param label_template: the template for the label in the plot (#: 1-based index, @: full classname, !: simple classname, $: options) :type label_template: str :param key_loc: the location string for the key :type key_loc: str :param outfile: the output file, ignored if None :type outfile: str :param wait: whether to wait for the user to close the plot :type wait: bool """ if not plot.matplotlib_available: logger.error("Matplotlib is not installed, plotting unavailable!") return if not train.has_class(): logger.error("Training set has no class attribute set!") return if (test is not None) and (train.equal_headers(test) is not None): logger.error("Training and test set are not compatible: " + train.equal_headers(test)) return if increments >= 1: inc = increments else: inc = round(train.num_instances * increments) steps = [] cls = [] evls = {} for classifier in classifiers: cl = Classifier.make_copy(classifier) cls.append(cl) evls[cl] = [] if test is None: tst = train else: tst = test for i in xrange(train.num_instances): if (i > 0) and (i % inc == 0): steps.append(i+1) for cl in cls: # train if cl.is_updateable: if i == 0: tr = Instances.copy_instances(train, 0, 1) cl.build_classifier(tr) else: cl.update_classifier(train.get_instance(i)) else: if (i > 0) and (i % inc == 0): tr = Instances.copy_instances(train, 0, i + 1) cl.build_classifier(tr) # evaluate if (i > 0) and (i % inc == 0): evl = Evaluation(tst) evl.test_model(cl, tst) evls[cl].append(getattr(evl, metric)) fig, ax = plt.subplots() ax.set_xlabel("# of instances") ax.set_ylabel(metric) ax.set_title(title) fig.canvas.set_window_title(title) ax.grid(True) i = 0 for cl in cls: evl = evls[cl] i += 1 plot_label = label_template.\ replace("#", str(i)).\ replace("@", cl.classname).\ replace("!", cl.classname[cl.classname.rfind(".") + 1:]).\ replace("$", join_options(cl.config)) ax.plot(steps, evl, label=plot_label) plt.draw() plt.legend(loc=key_loc, shadow=True) if outfile is not None: plt.savefig(outfile) if wait: plt.show()
def plot_learning_curve(classifiers, train, test=None, increments=100, metric="percent_correct", title="Learning curve", label_template="[#] @ $", key_loc="lower right", outfile=None, wait=True): """ Plots a learning curve. :param classifiers: list of Classifier template objects :type classifiers: list of Classifier :param train: dataset to use for the building the classifier, used for evaluating it test set None :type train: Instances :param test: optional dataset (or list of datasets) to use for the testing the built classifiers :type test: list or Instances :param increments: the increments (>= 1: # of instances, <1: percentage of dataset) :type increments: float :param metric: the name of the numeric metric to plot (Evaluation.<metric>) :type metric: str :param title: the title for the plot :type title: str :param label_template: the template for the label in the plot (#: 1-based index of classifier, @: full classname, !: simple classname, $: options, *: 1-based index of test set) :type label_template: str :param key_loc: the location string for the key :type key_loc: str :param outfile: the output file, ignored if None :type outfile: str :param wait: whether to wait for the user to close the plot :type wait: bool """ if not plot.matplotlib_available: logger.error("Matplotlib is not installed, plotting unavailable!") return if not train.has_class(): logger.error("Training set has no class attribute set!") return if increments >= 1: inc = increments else: inc = round(train.num_instances * increments) if test is None: tst = [train] elif isinstance(test, list): tst = test elif isinstance(test, Instances): tst = [test] else: logger.error("Expected list or Instances object, instead: " + type(test)) return for t in tst: if train.equal_headers(t) is not None: logger.error("Training and test set are not compatible: " + train.equal_headers(t)) return steps = [] cls = [] evls = {} for classifier in classifiers: cl = Classifier.make_copy(classifier) cls.append(cl) evls[cl] = {} for t in tst: evls[cl][t] = [] for i in xrange(train.num_instances): if (i > 0) and (i % inc == 0): steps.append(i + 1) for cl in cls: # train if cl.is_updateable: if i == 0: tr = Instances.copy_instances(train, 0, 1) cl.build_classifier(tr) else: cl.update_classifier(train.get_instance(i)) else: if (i > 0) and (i % inc == 0): tr = Instances.copy_instances(train, 0, i + 1) cl.build_classifier(tr) # evaluate if (i > 0) and (i % inc == 0): for t in tst: evl = Evaluation(t) evl.test_model(cl, t) evls[cl][t].append(getattr(evl, metric)) fig, ax = plt.subplots() ax.set_xlabel("# of instances") ax.set_ylabel(metric) ax.set_title(title) fig.canvas.set_window_title(title) ax.grid(True) i = 0 for cl in cls: evlpertest = evls[cl] i += 1 n = 0 for t in tst: evl = evlpertest[t] n += 1 plot_label = label_template.\ replace("#", str(i)).\ replace("*", str(n)).\ replace("@", cl.classname).\ replace("!", cl.classname[cl.classname.rfind(".") + 1:]).\ replace("$", join_options(cl.config)) ax.plot(steps, evl, label=plot_label) plt.draw() plt.legend(loc=key_loc, shadow=True) if outfile is not None: plt.savefig(outfile) if wait: plt.show()
def main(): """ Runs a filter from the command-line. Calls JVM start/stop automatically. Use -h to see all options. """ parser = argparse.ArgumentParser( description= 'Executes a filter from the command-line. Calls JVM start/stop automatically.' ) parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories") parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m") parser.add_argument("-i", metavar="input1", dest="input1", required=True, help="input file 1") parser.add_argument("-o", metavar="output1", dest="output1", required=True, help="output file 1") parser.add_argument("-r", metavar="input2", dest="input2", help="input file 2") parser.add_argument("-s", metavar="output2", dest="output2", help="output file 2") parser.add_argument("-c", metavar="classindex", default="-1", dest="classindex", help="1-based class attribute index") parser.add_argument("filter", help="filter classname, e.g., weka.filters.AllFilter") parser.add_argument("option", nargs=argparse.REMAINDER, help="additional filter options") parsed = parser.parse_args() if parsed.input2 is None and parsed.output2 is not None: raise Exception("No second input file provided ('-r ...')!") jars = [] if parsed.classpath is not None: jars = parsed.classpath.split(os.pathsep) params = [] if parsed.input1 is not None: params.extend(["-i", parsed.input1]) if parsed.output1 is not None: params.extend(["-o", parsed.output1]) if parsed.input2 is not None: params.extend(["-r", parsed.input2]) if parsed.output2 is not None: params.extend(["-s", parsed.output2]) if parsed.classindex is not None: params.extend(["-c", parsed.classindex]) jvm.start(jars, max_heap_size=parsed.heap, packages=True) logger.debug("Commandline: " + join_options(sys.argv[1:])) try: flter = Filter(parsed.filter) if len(parsed.option) > 0: flter.options = parsed.option loader = Loader(classname="weka.core.converters.ArffLoader") in1 = loader.load_file(parsed.input1) cls = parsed.classindex if str(parsed.classindex) == "first": cls = "0" if str(parsed.classindex) == "last": cls = str(in1.num_attributes - 1) in1.class_index = int(cls) flter.inputformat(in1) out1 = flter.filter(in1) saver = Saver(classname="weka.core.converters.ArffSaver") saver.save_file(out1, parsed.output1) if parsed.input2 is not None: in2 = loader.load_file(parsed.input2) in2.class_index = int(cls) out2 = flter.filter(in2) saver.save_file(out2, parsed.output2) except Exception as e: print(e) finally: jvm.stop()
def main(): """ Runs a filter from the command-line. Calls JVM start/stop automatically. Use -h to see all options. """ parser = argparse.ArgumentParser( description='Executes a filter from the command-line. Calls JVM start/stop automatically.') parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories") parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m") parser.add_argument("-i", metavar="input1", dest="input1", required=True, help="input file 1") parser.add_argument("-o", metavar="output1", dest="output1", required=True, help="output file 1") parser.add_argument("-r", metavar="input2", dest="input2", help="input file 2") parser.add_argument("-s", metavar="output2", dest="output2", help="output file 2") parser.add_argument("-c", metavar="classindex", default="-1", dest="classindex", help="1-based class attribute index") parser.add_argument("filter", help="filter classname, e.g., weka.filters.AllFilter") parser.add_argument("option", nargs=argparse.REMAINDER, help="additional filter options") parsed = parser.parse_args() if parsed.input2 is None and parsed.output2 is not None: raise Exception("No second input file provided ('-r ...')!") jars = [] if parsed.classpath is not None: jars = parsed.classpath.split(os.pathsep) params = [] if parsed.input1 is not None: params.extend(["-i", parsed.input1]) if parsed.output1 is not None: params.extend(["-o", parsed.output1]) if parsed.input2 is not None: params.extend(["-r", parsed.input2]) if parsed.output2 is not None: params.extend(["-s", parsed.output2]) if parsed.classindex is not None: params.extend(["-c", parsed.classindex]) jvm.start(jars, max_heap_size=parsed.heap, packages=True) logger.debug("Commandline: " + join_options(sys.argv[1:])) try: flter = Filter(parsed.filter) if len(parsed.option) > 0: flter.options = parsed.option loader = Loader(classname="weka.core.converters.ArffLoader") in1 = loader.load_file(parsed.input1) cls = parsed.classindex if str(parsed.classindex) == "first": cls = "0" if str(parsed.classindex) == "last": cls = str(in1.num_attributes - 1) in1.class_index = int(cls) flter.inputformat(in1) out1 = flter.filter(in1) saver = Saver(classname="weka.core.converters.ArffSaver") saver.save_file(out1, parsed.output1) if parsed.input2 is not None: in2 = loader.load_file(parsed.input2) in2.class_index = int(cls) out2 = flter.filter(in2) saver.save_file(out2, parsed.output2) except Exception, e: print(e)