Пример #1
0
 def test_join_options(self):
     """
     Tests the join_options method.
     """
     self.assertEqual("", str(classes.join_options([])))
     self.assertEqual(
         "-t /some/where/test.arff",
         str(classes.join_options(["-t", "/some/where/test.arff"])))
def main():
    """
    Runs a datagenerator from the command-line. Calls JVM start/stop automatically.
    Use -h to see all options.
    """
    parser = argparse.ArgumentParser(
        description='Executes a data generator from the command-line. Calls JVM start/stop automatically.')
    parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories")
    parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m")
    parser.add_argument("datagenerator", help="data generator classname, e.g., "
                                              + "weka.datagenerators.classifiers.classification.LED24")
    parser.add_argument("option", nargs=argparse.REMAINDER, help="additional data generator options")
    parsed = parser.parse_args()
    jars = []
    if parsed.classpath is not None:
        jars = parsed.classpath.split(os.pathsep)

    jvm.start(jars, max_heap_size=parsed.heap, packages=True)

    logger.debug("Commandline: " + join_options(sys.argv[1:]))

    try:
        generator = DataGenerator(classname=parsed.datagenerator)
        if len(parsed.option) > 0:
            generator.options = parsed.option
        DataGenerator.make_data(generator, parsed.option)
    except Exception, e:
        print(e)
def main():
    """
    Runs a associator from the command-line. Calls JVM start/stop automatically.
    Use -h to see all options.
    """

    parser = argparse.ArgumentParser(
        description='Executes an associator from the command-line. Calls JVM start/stop automatically.')
    parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories")
    parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m")
    parser.add_argument("-t", metavar="train", dest="train", required=True, help="training set file")
    parser.add_argument("associator", help="associator classname, e.g., weka.associations.Apriori")
    parser.add_argument("option", nargs=argparse.REMAINDER, help="additional associator options")
    parsed = parser.parse_args()
    jars = []
    if parsed.classpath is not None:
        jars = parsed.classpath.split(os.pathsep)

    jvm.start(jars, max_heap_size=parsed.heap, packages=True)

    logger.debug("Commandline: " + join_options(sys.argv[1:]))

    try:
        associator = Associator(classname=parsed.associator)
        if len(parsed.option) > 0:
            associator.options = parsed.option
        loader = converters.loader_for_file(parsed.train)
        data = loader.load_file(parsed.train)
        associator.build_associations(data)
        print(str(associator))
    except Exception, e:
        print(e)
Пример #4
0
def main():
    """
    Runs a associator from the command-line. Calls JVM start/stop automatically.
    Use -h to see all options.
    """

    parser = argparse.ArgumentParser(
        description='Executes an associator from the command-line. Calls JVM start/stop automatically.')
    parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories")
    parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m")
    parser.add_argument("-t", metavar="train", dest="train", required=True, help="training set file")
    parser.add_argument("associator", help="associator classname, e.g., weka.associations.Apriori")
    parser.add_argument("option", nargs=argparse.REMAINDER, help="additional associator options")
    parsed = parser.parse_args()
    jars = []
    if parsed.classpath is not None:
        jars = parsed.classpath.split(os.pathsep)

    jvm.start(jars, max_heap_size=parsed.heap, packages=True)

    logger.debug("Commandline: " + join_options(sys.argv[1:]))

    try:
        associator = Associator(classname=parsed.associator)
        if len(parsed.option) > 0:
            associator.options = parsed.option
        loader = converters.loader_for_file(parsed.train)
        data = loader.load_file(parsed.train)
        associator.build_associations(data)
        print(str(associator))
    except Exception as e:
        print(e)
    finally:
        jvm.stop()
Пример #5
0
def main():
    """
    Runs a clusterer from the command-line. Calls JVM start/stop automatically.
    Use -h to see all options.
    """
    parser = argparse.ArgumentParser(
        description='Performs clustering from the command-line. Calls JVM start/stop automatically.')
    parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories")
    parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m")
    parser.add_argument("-t", metavar="train", dest="train", required=True, help="training set file")
    parser.add_argument("-T", metavar="test", dest="test", help="test set file")
    parser.add_argument("-d", metavar="outmodel", dest="outmodel", help="model output file name")
    parser.add_argument("-l", metavar="inmodel", dest="inmodel", help="model input file name")
    parser.add_argument("-p", metavar="attributes", dest="attributes", help="attribute range")
    parser.add_argument("-x", metavar="num folds", dest="numfolds", help="number of folds")
    parser.add_argument("-s", metavar="seed", dest="seed", help="seed value for randomization")
    parser.add_argument("-c", metavar="class index", dest="classindex", help="1-based class attribute index")
    parser.add_argument("-g", metavar="graph", dest="graph", help="graph output file (if supported)")
    parser.add_argument("clusterer", help="clusterer classname, e.g., weka.clusterers.SimpleKMeans")
    parser.add_argument("option", nargs=argparse.REMAINDER, help="additional clusterer options")
    parsed = parser.parse_args()
    jars = []
    if parsed.classpath is not None:
        jars = parsed.classpath.split(os.pathsep)
    params = []
    if parsed.train is not None:
        params.extend(["-t", parsed.train])
    if parsed.test is not None:
        params.extend(["-T", parsed.test])
    if parsed.outmodel is not None:
        params.extend(["-d", parsed.outmodel])
    if parsed.inmodel is not None:
        params.extend(["-l", parsed.inmodel])
    if parsed.attributes is not None:
        params.extend(["-p", parsed.attributes])
    if parsed.numfolds is not None:
        params.extend(["-x", parsed.numfolds])
    if parsed.seed is not None:
        params.extend(["-s", parsed.seed])
    if parsed.classindex is not None:
        params.extend(["-c", parsed.classindex])
    if parsed.graph is not None:
        params.extend(["-g", parsed.graph])

    jvm.start(jars, max_heap_size=parsed.heap, packages=True)

    logger.debug("Commandline: " + join_options(sys.argv[1:]))

    try:
        clusterer = Clusterer(classname=parsed.clusterer)
        if len(parsed.option) > 0:
            clusterer.options = parsed.option
        print(ClusterEvaluation.evaluate_clusterer(clusterer, params))
    except Exception as e:
        print(e)
    finally:
        jvm.stop()
Пример #6
0
def main(args=None):
    """
    Runs attribute selection from the command-line. Calls JVM start/stop automatically.
    Use -h to see all options.

    :param args: the command-line arguments to use, uses sys.argv if None
    :type args: list
    """

    parser = argparse.ArgumentParser(
        description='Performs attribute selection from the command-line. Calls JVM start/stop automatically.')
    parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories")
    parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m")
    parser.add_argument("-i", metavar="input", dest="input", required=True, help="input file")
    parser.add_argument("-c", metavar="class index", dest="classindex", help="1-based class attribute index")
    parser.add_argument("-s", metavar="search", dest="search", help="search method, classname and options")
    parser.add_argument("-x", metavar="num folds", dest="numfolds", help="number of folds")
    parser.add_argument("-n", metavar="seed", dest="seed", help="the seed value for randomization")
    parser.add_argument("evaluator", help="evaluator classname, e.g., weka.attributeSelection.CfsSubsetEval")
    parser.add_argument("option", nargs=argparse.REMAINDER, help="additional evaluator options")
    parsed = parser.parse_args(args=args)
    jars = []
    if parsed.classpath is not None:
        jars = parsed.classpath.split(os.pathsep)
    params = []
    if parsed.input is not None:
        params.extend(["-i", parsed.input])
    if parsed.classindex is not None:
        params.extend(["-c", parsed.classindex])
    if parsed.search is not None:
        params.extend(["-s", parsed.search])
    if parsed.numfolds is not None:
        params.extend(["-x", parsed.numfolds])
    if parsed.seed is not None:
        params.extend(["-n", parsed.seed])

    jvm.start(jars, max_heap_size=parsed.heap, packages=True)

    logger.debug("Commandline: " + join_options(sys.argv[1:]))

    try:
        evaluation = ASEvaluation(classname=parsed.evaluator)
        if len(parsed.option) > 0:
            evaluation.options = parsed.option
        print(AttributeSelection.attribute_selection(evaluation, params))
    except Exception as e:
        print(traceback.format_exc())
    finally:
        jvm.stop()
def main():
    """
    Runs attribute selection from the command-line. Calls JVM start/stop automatically.
    Use -h to see all options.
    """
    parser = argparse.ArgumentParser(
        description='Performs attribute selection from the command-line. Calls JVM start/stop automatically.')
    parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories")
    parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m")
    parser.add_argument("-i", metavar="input", dest="input", required=True, help="input file")
    parser.add_argument("-c", metavar="class index", dest="classindex", help="1-based class attribute index")
    parser.add_argument("-s", metavar="search", dest="search", help="search method, classname and options")
    parser.add_argument("-x", metavar="num folds", dest="numfolds", help="number of folds")
    parser.add_argument("-n", metavar="seed", dest="seed", help="the seed value for randomization")
    parser.add_argument("evaluator", help="evaluator classname, e.g., weka.attributeSelection.CfsSubsetEval")
    parser.add_argument("option", nargs=argparse.REMAINDER, help="additional evaluator options")
    parsed = parser.parse_args()
    jars = []
    if parsed.classpath is not None:
        jars = parsed.classpath.split(os.pathsep)
    params = []
    if parsed.input is not None:
        params.extend(["-i", parsed.input])
    if parsed.classindex is not None:
        params.extend(["-c", parsed.classindex])
    if parsed.search is not None:
        params.extend(["-s", parsed.search])
    if parsed.numfolds is not None:
        params.extend(["-x", parsed.numfolds])
    if parsed.seed is not None:
        params.extend(["-n", parsed.seed])

    jvm.start(jars, max_heap_size=parsed.heap, packages=True)

    logger.debug("Commandline: " + join_options(sys.argv[1:]))

    try:
        evaluation = ASEvaluation(classname=parsed.evaluator)
        if len(parsed.option) > 0:
            evaluation.options = parsed.option
        print(AttributeSelection.attribute_selection(evaluation, params))
    except Exception as e:
        print(e)
    finally:
        jvm.stop()
Пример #8
0
 def test_join_options(self):
     """
     Tests the join_options method.
     """
     self.assertEqual("", str(classes.join_options([])))
     self.assertEqual("-t /some/where/test.arff", str(classes.join_options(["-t", "/some/where/test.arff"])))
def plot_learning_curve(classifiers, train, test=None, increments=100, metric="percent_correct",
                        title="Learning curve", label_template="[#] @ $", key_loc="lower right",
                        outfile=None, wait=True):
    """
    Plots
    :param classifiers: list of Classifier template objects
    :type classifiers: list of Classifier
    :param train: dataset to use for the building the classifier, used for evaluating it test set None
    :type train: Instances
    :param test: optional dataset to use for the testing the built classifiers
    :type test: Instances
    :param increments: the increments (>= 1: # of instances, <1: percentage of dataset)
    :type increments: float
    :param metric: the name of the numeric metric to plot (Evaluation.<metric>)
    :type metric: str
    :param title: the title for the plot
    :type title: str
    :param label_template: the template for the label in the plot
                           (#: 1-based index, @: full classname, !: simple classname, $: options)
    :type label_template: str
    :param key_loc: the location string for the key
    :type key_loc: str
    :param outfile: the output file, ignored if None
    :type outfile: str
    :param wait: whether to wait for the user to close the plot
    :type wait: bool
    """

    if not plot.matplotlib_available:
        logger.error("Matplotlib is not installed, plotting unavailable!")
        return
    if not train.has_class():
        logger.error("Training set has no class attribute set!")
        return
    if (test is not None) and (train.equal_headers(test) is not None):
        logger.error("Training and test set are not compatible: " + train.equal_headers(test))
        return

    if increments >= 1:
        inc = increments
    else:
        inc = round(train.num_instances * increments)

    steps = []
    cls = []
    evls = {}
    for classifier in classifiers:
        cl = Classifier.make_copy(classifier)
        cls.append(cl)
        evls[cl] = []
    if test is None:
        tst = train
    else:
        tst = test

    for i in xrange(train.num_instances):
        if (i > 0) and (i % inc == 0):
            steps.append(i+1)
        for cl in cls:
            # train
            if cl.is_updateable:
                if i == 0:
                    tr = Instances.copy_instances(train, 0, 1)
                    cl.build_classifier(tr)
                else:
                    cl.update_classifier(train.get_instance(i))
            else:
                if (i > 0) and (i % inc == 0):
                    tr = Instances.copy_instances(train, 0, i + 1)
                    cl.build_classifier(tr)
            # evaluate
            if (i > 0) and (i % inc == 0):
                evl = Evaluation(tst)
                evl.test_model(cl, tst)
                evls[cl].append(getattr(evl, metric))

    fig, ax = plt.subplots()
    ax.set_xlabel("# of instances")
    ax.set_ylabel(metric)
    ax.set_title(title)
    fig.canvas.set_window_title(title)
    ax.grid(True)
    i = 0
    for cl in cls:
        evl = evls[cl]
        i += 1
        plot_label = label_template.\
            replace("#", str(i)).\
            replace("@", cl.classname).\
            replace("!", cl.classname[cl.classname.rfind(".") + 1:]).\
            replace("$", join_options(cl.config))
        ax.plot(steps, evl, label=plot_label)
    plt.draw()
    plt.legend(loc=key_loc, shadow=True)
    if outfile is not None:
        plt.savefig(outfile)
    if wait:
        plt.show()
Пример #10
0
def plot_learning_curve(classifiers,
                        train,
                        test=None,
                        increments=100,
                        metric="percent_correct",
                        title="Learning curve",
                        label_template="[#] @ $",
                        key_loc="lower right",
                        outfile=None,
                        wait=True):
    """
    Plots a learning curve.

    :param classifiers: list of Classifier template objects
    :type classifiers: list of Classifier
    :param train: dataset to use for the building the classifier, used for evaluating it test set None
    :type train: Instances
    :param test: optional dataset (or list of datasets) to use for the testing the built classifiers
    :type test: list or Instances
    :param increments: the increments (>= 1: # of instances, <1: percentage of dataset)
    :type increments: float
    :param metric: the name of the numeric metric to plot (Evaluation.<metric>)
    :type metric: str
    :param title: the title for the plot
    :type title: str
    :param label_template: the template for the label in the plot
                           (#: 1-based index of classifier, @: full classname, !: simple classname,
                           $: options, *: 1-based index of test set)
    :type label_template: str
    :param key_loc: the location string for the key
    :type key_loc: str
    :param outfile: the output file, ignored if None
    :type outfile: str
    :param wait: whether to wait for the user to close the plot
    :type wait: bool
    """

    if not plot.matplotlib_available:
        logger.error("Matplotlib is not installed, plotting unavailable!")
        return
    if not train.has_class():
        logger.error("Training set has no class attribute set!")
        return

    if increments >= 1:
        inc = increments
    else:
        inc = round(train.num_instances * increments)

    if test is None:
        tst = [train]
    elif isinstance(test, list):
        tst = test
    elif isinstance(test, Instances):
        tst = [test]
    else:
        logger.error("Expected list or Instances object, instead: " +
                     type(test))
        return
    for t in tst:
        if train.equal_headers(t) is not None:
            logger.error("Training and test set are not compatible: " +
                         train.equal_headers(t))
            return

    steps = []
    cls = []
    evls = {}
    for classifier in classifiers:
        cl = Classifier.make_copy(classifier)
        cls.append(cl)
        evls[cl] = {}
        for t in tst:
            evls[cl][t] = []

    for i in xrange(train.num_instances):
        if (i > 0) and (i % inc == 0):
            steps.append(i + 1)
        for cl in cls:
            # train
            if cl.is_updateable:
                if i == 0:
                    tr = Instances.copy_instances(train, 0, 1)
                    cl.build_classifier(tr)
                else:
                    cl.update_classifier(train.get_instance(i))
            else:
                if (i > 0) and (i % inc == 0):
                    tr = Instances.copy_instances(train, 0, i + 1)
                    cl.build_classifier(tr)
            # evaluate
            if (i > 0) and (i % inc == 0):
                for t in tst:
                    evl = Evaluation(t)
                    evl.test_model(cl, t)
                    evls[cl][t].append(getattr(evl, metric))

    fig, ax = plt.subplots()
    ax.set_xlabel("# of instances")
    ax.set_ylabel(metric)
    ax.set_title(title)
    fig.canvas.set_window_title(title)
    ax.grid(True)
    i = 0
    for cl in cls:
        evlpertest = evls[cl]
        i += 1
        n = 0
        for t in tst:
            evl = evlpertest[t]
            n += 1
            plot_label = label_template.\
                replace("#", str(i)).\
                replace("*", str(n)).\
                replace("@", cl.classname).\
                replace("!", cl.classname[cl.classname.rfind(".") + 1:]).\
                replace("$", join_options(cl.config))
            ax.plot(steps, evl, label=plot_label)
    plt.draw()
    plt.legend(loc=key_loc, shadow=True)
    if outfile is not None:
        plt.savefig(outfile)
    if wait:
        plt.show()
Пример #11
0
def main():
    """
    Runs a filter from the command-line. Calls JVM start/stop automatically.
    Use -h to see all options.
    """
    parser = argparse.ArgumentParser(
        description=
        'Executes a filter from the command-line. Calls JVM start/stop automatically.'
    )
    parser.add_argument("-j",
                        metavar="classpath",
                        dest="classpath",
                        help="additional classpath, jars/directories")
    parser.add_argument("-X",
                        metavar="heap",
                        dest="heap",
                        help="max heap size for jvm, e.g., 512m")
    parser.add_argument("-i",
                        metavar="input1",
                        dest="input1",
                        required=True,
                        help="input file 1")
    parser.add_argument("-o",
                        metavar="output1",
                        dest="output1",
                        required=True,
                        help="output file 1")
    parser.add_argument("-r",
                        metavar="input2",
                        dest="input2",
                        help="input file 2")
    parser.add_argument("-s",
                        metavar="output2",
                        dest="output2",
                        help="output file 2")
    parser.add_argument("-c",
                        metavar="classindex",
                        default="-1",
                        dest="classindex",
                        help="1-based class attribute index")
    parser.add_argument("filter",
                        help="filter classname, e.g., weka.filters.AllFilter")
    parser.add_argument("option",
                        nargs=argparse.REMAINDER,
                        help="additional filter options")
    parsed = parser.parse_args()
    if parsed.input2 is None and parsed.output2 is not None:
        raise Exception("No second input file provided ('-r ...')!")

    jars = []
    if parsed.classpath is not None:
        jars = parsed.classpath.split(os.pathsep)
    params = []
    if parsed.input1 is not None:
        params.extend(["-i", parsed.input1])
    if parsed.output1 is not None:
        params.extend(["-o", parsed.output1])
    if parsed.input2 is not None:
        params.extend(["-r", parsed.input2])
    if parsed.output2 is not None:
        params.extend(["-s", parsed.output2])
    if parsed.classindex is not None:
        params.extend(["-c", parsed.classindex])

    jvm.start(jars, max_heap_size=parsed.heap, packages=True)

    logger.debug("Commandline: " + join_options(sys.argv[1:]))

    try:
        flter = Filter(parsed.filter)
        if len(parsed.option) > 0:
            flter.options = parsed.option
        loader = Loader(classname="weka.core.converters.ArffLoader")
        in1 = loader.load_file(parsed.input1)
        cls = parsed.classindex
        if str(parsed.classindex) == "first":
            cls = "0"
        if str(parsed.classindex) == "last":
            cls = str(in1.num_attributes - 1)
        in1.class_index = int(cls)
        flter.inputformat(in1)
        out1 = flter.filter(in1)
        saver = Saver(classname="weka.core.converters.ArffSaver")
        saver.save_file(out1, parsed.output1)
        if parsed.input2 is not None:
            in2 = loader.load_file(parsed.input2)
            in2.class_index = int(cls)
            out2 = flter.filter(in2)
            saver.save_file(out2, parsed.output2)
    except Exception as e:
        print(e)
    finally:
        jvm.stop()
Пример #12
0
def main():
    """
    Runs a filter from the command-line. Calls JVM start/stop automatically.
    Use -h to see all options.
    """
    parser = argparse.ArgumentParser(
        description='Executes a filter from the command-line. Calls JVM start/stop automatically.')
    parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories")
    parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m")
    parser.add_argument("-i", metavar="input1", dest="input1", required=True, help="input file 1")
    parser.add_argument("-o", metavar="output1", dest="output1", required=True, help="output file 1")
    parser.add_argument("-r", metavar="input2", dest="input2", help="input file 2")
    parser.add_argument("-s", metavar="output2", dest="output2", help="output file 2")
    parser.add_argument("-c", metavar="classindex", default="-1", dest="classindex",
                        help="1-based class attribute index")
    parser.add_argument("filter", help="filter classname, e.g., weka.filters.AllFilter")
    parser.add_argument("option", nargs=argparse.REMAINDER, help="additional filter options")
    parsed = parser.parse_args()
    if parsed.input2 is None and parsed.output2 is not None:
        raise Exception("No second input file provided ('-r ...')!")

    jars = []
    if parsed.classpath is not None:
        jars = parsed.classpath.split(os.pathsep)
    params = []
    if parsed.input1 is not None:
        params.extend(["-i", parsed.input1])
    if parsed.output1 is not None:
        params.extend(["-o", parsed.output1])
    if parsed.input2 is not None:
        params.extend(["-r", parsed.input2])
    if parsed.output2 is not None:
        params.extend(["-s", parsed.output2])
    if parsed.classindex is not None:
        params.extend(["-c", parsed.classindex])

    jvm.start(jars, max_heap_size=parsed.heap, packages=True)

    logger.debug("Commandline: " + join_options(sys.argv[1:]))

    try:
        flter = Filter(parsed.filter)
        if len(parsed.option) > 0:
            flter.options = parsed.option
        loader = Loader(classname="weka.core.converters.ArffLoader")
        in1 = loader.load_file(parsed.input1)
        cls = parsed.classindex
        if str(parsed.classindex) == "first":
            cls = "0"
        if str(parsed.classindex) == "last":
            cls = str(in1.num_attributes - 1)
        in1.class_index = int(cls)
        flter.inputformat(in1)
        out1 = flter.filter(in1)
        saver = Saver(classname="weka.core.converters.ArffSaver")
        saver.save_file(out1, parsed.output1)
        if parsed.input2 is not None:
            in2 = loader.load_file(parsed.input2)
            in2.class_index = int(cls)
            out2 = flter.filter(in2)
            saver.save_file(out2, parsed.output2)
    except Exception, e:
        print(e)